yamildiego commited on
Commit
12d0a16
·
1 Parent(s): 303dddc
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .DS_Store +0 -0
  2. handler.py +5 -1
  3. insightface/.gitignore +103 -0
  4. insightface/CODE_OF_CONDUCT.md +128 -0
  5. insightface/README.md +258 -0
  6. insightface/alignment/README.md +42 -0
  7. insightface/alignment/_datasets_/README.md +57 -0
  8. insightface/alignment/coordinate_reg/README.md +58 -0
  9. insightface/alignment/coordinate_reg/image_infer.py +23 -0
  10. insightface/alignment/heatmap/README.md +10 -0
  11. insightface/alignment/heatmap/data.py +354 -0
  12. insightface/alignment/heatmap/img_helper.py +86 -0
  13. insightface/alignment/heatmap/metric.py +107 -0
  14. insightface/alignment/heatmap/optimizer.py +65 -0
  15. insightface/alignment/heatmap/sample_config.py +98 -0
  16. insightface/alignment/heatmap/symbol/sym_heatmap.py +1085 -0
  17. insightface/alignment/heatmap/test.py +100 -0
  18. insightface/alignment/heatmap/test_rec_nme.py +71 -0
  19. insightface/alignment/heatmap/train.py +236 -0
  20. insightface/alignment/synthetics/README.md +63 -0
  21. insightface/alignment/synthetics/datasets/augs.py +40 -0
  22. insightface/alignment/synthetics/datasets/dataset_synthetics.py +163 -0
  23. insightface/alignment/synthetics/test_synthetics.py +104 -0
  24. insightface/alignment/synthetics/tools/prepare_synthetics.py +70 -0
  25. insightface/alignment/synthetics/trainer_synthetics.py +140 -0
  26. insightface/attribute/README.md +33 -0
  27. insightface/attribute/_datasets_/README.md +15 -0
  28. insightface/attribute/gender_age/test.py +24 -0
  29. insightface/benchmarks/train/nvidia_a10.md +48 -0
  30. insightface/benchmarks/train/nvidia_a100.md +53 -0
  31. insightface/benchmarks/train/nvidia_a30.md +52 -0
  32. insightface/benchmarks/train/nvidia_rtx3080.md +58 -0
  33. insightface/benchmarks/train/nvidia_rtx3090.md +57 -0
  34. insightface/benchmarks/train/nvidia_v100.md +54 -0
  35. insightface/body/human_pose/ambiguity_aware/README.md +94 -0
  36. insightface/body/human_pose/ambiguity_aware/cfg/h36m_gt_adv.yaml +36 -0
  37. insightface/body/human_pose/ambiguity_aware/cfg/h36m_gt_scale.yaml +40 -0
  38. insightface/body/human_pose/ambiguity_aware/cfg/pre_adv.yaml +36 -0
  39. insightface/body/human_pose/ambiguity_aware/cfg/pre_tmc_klbone.yaml +39 -0
  40. insightface/body/human_pose/ambiguity_aware/requirements.txt +12 -0
  41. insightface/body/human_pose/ambiguity_aware/scripts/_init_paths.py +13 -0
  42. insightface/body/human_pose/ambiguity_aware/scripts/demo.sh +5 -0
  43. insightface/body/human_pose/ambiguity_aware/scripts/demo_input/0.jpg +0 -0
  44. insightface/body/human_pose/ambiguity_aware/scripts/demo_input/0.pkl +3 -0
  45. insightface/body/human_pose/ambiguity_aware/scripts/demo_input/1.jpg +0 -0
  46. insightface/body/human_pose/ambiguity_aware/scripts/demo_input/1.pkl +3 -0
  47. insightface/body/human_pose/ambiguity_aware/scripts/demo_input/10.jpg +0 -0
  48. insightface/body/human_pose/ambiguity_aware/scripts/demo_input/10.pkl +3 -0
  49. insightface/body/human_pose/ambiguity_aware/scripts/demo_input/11.jpg +0 -0
  50. insightface/body/human_pose/ambiguity_aware/scripts/demo_input/11.pkl +3 -0
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
handler.py CHANGED
@@ -14,6 +14,10 @@ from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
14
 
15
  from huggingface_hub import hf_hub_download
16
 
 
 
 
 
17
  from insightface.app import FaceAnalysis
18
 
19
  from style_template import styles
@@ -48,7 +52,7 @@ class EndpointHandler():
48
  # providers=["CPUExecutionProvider"],
49
  # )
50
  self.app = FaceAnalysis(
51
- name="antelopev2",
52
  root="./",
53
  providers=["CPUExecutionProvider"],
54
  )
 
14
 
15
  from huggingface_hub import hf_hub_download
16
 
17
+ import sys
18
+ root_local = './'
19
+ sys.path.insert(0, root_local)
20
+
21
  from insightface.app import FaceAnalysis
22
 
23
  from style_template import styles
 
52
  # providers=["CPUExecutionProvider"],
53
  # )
54
  self.app = FaceAnalysis(
55
+ name="buffalo_l",
56
  root="./",
57
  providers=["CPUExecutionProvider"],
58
  )
insightface/.gitignore ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ env/
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+
28
+ # PyInstaller
29
+ # Usually these files are written by a python script from a template
30
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .coverage
42
+ .coverage.*
43
+ .cache
44
+ nosetests.xml
45
+ coverage.xml
46
+ *.cover
47
+ .hypothesis/
48
+
49
+ # Translations
50
+ *.mo
51
+ *.pot
52
+
53
+ # Django stuff:
54
+ *.log
55
+ local_settings.py
56
+
57
+ # Flask stuff:
58
+ instance/
59
+ .webassets-cache
60
+
61
+ # Scrapy stuff:
62
+ .scrapy
63
+
64
+ # Sphinx documentation
65
+ docs/_build/
66
+
67
+ # PyBuilder
68
+ target/
69
+
70
+ # Jupyter Notebook
71
+ .ipynb_checkpoints
72
+
73
+ # pyenv
74
+ .python-version
75
+
76
+ # celery beat schedule file
77
+ celerybeat-schedule
78
+
79
+ # SageMath parsed files
80
+ *.sage.py
81
+
82
+ # dotenv
83
+ .env
84
+
85
+ # virtualenv
86
+ .venv
87
+ venv/
88
+ ENV/
89
+
90
+ # Spyder project settings
91
+ .spyderproject
92
+ .spyproject
93
+
94
+ # Rope project settings
95
+ .ropeproject
96
+
97
+ # mkdocs documentation
98
+ /site
99
+
100
+ # mypy
101
+ .mypy_cache/
102
+
103
+ .DS_Store
insightface/CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, religion, or sexual identity
10
+ and orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment for our
18
+ community include:
19
+
20
+ * Demonstrating empathy and kindness toward other people
21
+ * Being respectful of differing opinions, viewpoints, and experiences
22
+ * Giving and gracefully accepting constructive feedback
23
+ * Accepting responsibility and apologizing to those affected by our mistakes,
24
+ and learning from the experience
25
+ * Focusing on what is best not just for us as individuals, but for the
26
+ overall community
27
+
28
+ Examples of unacceptable behavior include:
29
+
30
+ * The use of sexualized language or imagery, and sexual attention or
31
+ advances of any kind
32
+ * Trolling, insulting or derogatory comments, and personal or political attacks
33
+ * Public or private harassment
34
+ * Publishing others' private information, such as a physical or email
35
+ address, without their explicit permission
36
+ * Other conduct which could reasonably be considered inappropriate in a
37
+ professional setting
38
+
39
+ ## Enforcement Responsibilities
40
+
41
+ Community leaders are responsible for clarifying and enforcing our standards of
42
+ acceptable behavior and will take appropriate and fair corrective action in
43
+ response to any behavior that they deem inappropriate, threatening, offensive,
44
+ or harmful.
45
+
46
+ Community leaders have the right and responsibility to remove, edit, or reject
47
+ comments, commits, code, wiki edits, issues, and other contributions that are
48
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
49
+ decisions when appropriate.
50
+
51
+ ## Scope
52
+
53
+ This Code of Conduct applies within all community spaces, and also applies when
54
+ an individual is officially representing the community in public spaces.
55
+ Examples of representing our community include using an official e-mail address,
56
+ posting via an official social media account, or acting as an appointed
57
+ representative at an online or offline event.
58
+
59
+ ## Enforcement
60
+
61
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
62
+ reported to the community leaders responsible for enforcement at
63
64
+ All complaints will be reviewed and investigated promptly and fairly.
65
+
66
+ All community leaders are obligated to respect the privacy and security of the
67
+ reporter of any incident.
68
+
69
+ ## Enforcement Guidelines
70
+
71
+ Community leaders will follow these Community Impact Guidelines in determining
72
+ the consequences for any action they deem in violation of this Code of Conduct:
73
+
74
+ ### 1. Correction
75
+
76
+ **Community Impact**: Use of inappropriate language or other behavior deemed
77
+ unprofessional or unwelcome in the community.
78
+
79
+ **Consequence**: A private, written warning from community leaders, providing
80
+ clarity around the nature of the violation and an explanation of why the
81
+ behavior was inappropriate. A public apology may be requested.
82
+
83
+ ### 2. Warning
84
+
85
+ **Community Impact**: A violation through a single incident or series
86
+ of actions.
87
+
88
+ **Consequence**: A warning with consequences for continued behavior. No
89
+ interaction with the people involved, including unsolicited interaction with
90
+ those enforcing the Code of Conduct, for a specified period of time. This
91
+ includes avoiding interactions in community spaces as well as external channels
92
+ like social media. Violating these terms may lead to a temporary or
93
+ permanent ban.
94
+
95
+ ### 3. Temporary Ban
96
+
97
+ **Community Impact**: A serious violation of community standards, including
98
+ sustained inappropriate behavior.
99
+
100
+ **Consequence**: A temporary ban from any sort of interaction or public
101
+ communication with the community for a specified period of time. No public or
102
+ private interaction with the people involved, including unsolicited interaction
103
+ with those enforcing the Code of Conduct, is allowed during this period.
104
+ Violating these terms may lead to a permanent ban.
105
+
106
+ ### 4. Permanent Ban
107
+
108
+ **Community Impact**: Demonstrating a pattern of violation of community
109
+ standards, including sustained inappropriate behavior, harassment of an
110
+ individual, or aggression toward or disparagement of classes of individuals.
111
+
112
+ **Consequence**: A permanent ban from any sort of public interaction within
113
+ the community.
114
+
115
+ ## Attribution
116
+
117
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118
+ version 2.0, available at
119
+ https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120
+
121
+ Community Impact Guidelines were inspired by [Mozilla's code of conduct
122
+ enforcement ladder](https://github.com/mozilla/diversity).
123
+
124
+ [homepage]: https://www.contributor-covenant.org
125
+
126
+ For answers to common questions about this code of conduct, see the FAQ at
127
+ https://www.contributor-covenant.org/faq. Translations are available at
128
+ https://www.contributor-covenant.org/translations.
insightface/README.md ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # InsightFace: 2D and 3D Face Analysis Project
3
+
4
+ <div align="left">
5
+ <img src="https://insightface.ai/assets/img/custom/logo3.jpg" width="240"/>
6
+ </div>
7
+
8
+ InsightFace project is mainly maintained By [Jia Guo](mailto:[email protected]?subject=[GitHub]%20InsightFace%20Project) and [Jiankang Deng](https://jiankangdeng.github.io/).
9
+
10
+ For all main contributors, please check [contributing](#contributing).
11
+
12
+ ## License
13
+
14
+ The code of InsightFace is released under the MIT License. There is no limitation for both academic and commercial usage.
15
+
16
+ The training data containing the annotation (and the models trained with these data) are available for non-commercial research purposes only.
17
+
18
+ Both manual-downloading models from our github repo and auto-downloading models with our [python-library](python-package) follow the above license policy(which is for non-commercial research purposes only).
19
+
20
+ ## Top News
21
+
22
+ **`2023-08-08`**: We released the implementation of [Generalizing Gaze Estimation with Weak-Supervision from Synthetic Views](https://arxiv.org/abs/2212.02997) at [reconstruction/gaze](reconstruction/gaze).
23
+
24
+ **`2023-05-03`**: We have launched the ongoing version of wild face anti-spoofing challenge. See details [here](https://github.com/deepinsight/insightface/tree/master/challenges/cvpr23-fas-wild#updates).
25
+
26
+ **`2023-04-01`**: We move the swapping demo to Discord bot, which support editing on Midjourney generated images, see detail at [web-demos/swapping_discord](web-demos/swapping_discord).
27
+
28
+ **`2023-02-13`**: We launch a large scale in the wild face anti-spoofing challenge on CVPR23 Workshop, see details at [challenges/cvpr23-fas-wild](challenges/cvpr23-fas-wild).
29
+
30
+ **`2022-11-28`**: Single line code for facial identity swapping in our python packge ver 0.7, please check the example [here](examples/in_swapper).
31
+
32
+ **`2022-10-28`**: [MFR-Ongoing](http://iccv21-mfr.com) website is refactored, please create issues if there's any bug.
33
+
34
+ **`2022-09-22`**: Now we have [web-demos](web-demos): [face-localization](http://demo.insightface.ai:7007/), [face-recognition](http://demo.insightface.ai:7008/), and [face-swapping](http://demo.insightface.ai:7009/).
35
+
36
+ **`2022-08-12`**: We achieved Rank-1st of
37
+ [Perspective Projection Based Monocular 3D Face Reconstruction Challenge](https://tianchi.aliyun.com/competition/entrance/531961/introduction)
38
+ of [ECCV-2022 WCPA Workshop](https://sites.google.com/view/wcpa2022), [paper](https://arxiv.org/abs/2208.07142) and [code](reconstruction/jmlr).
39
+
40
+ **`2022-03-30`**: [Partial FC](https://arxiv.org/abs/2203.15565) accepted by CVPR-2022.
41
+
42
+ **`2022-02-23`**: [SCRFD](detection/scrfd) accepted by [ICLR-2022](https://iclr.cc/Conferences/2022).
43
+
44
+ **`2021-11-30`**: [MFR-Ongoing](challenges/mfr) challenge launched(same with IFRT), which is an extended version of [iccv21-mfr](challenges/iccv21-mfr).
45
+
46
+ **`2021-10-29`**: We achieved 1st place on the [VISA track](https://pages.nist.gov/frvt/plots/11/visa.html) of [NIST-FRVT 1:1](https://pages.nist.gov/frvt/html/frvt11.html) by using Partial FC (Xiang An, Jiankang Deng, Jia Guo).
47
+
48
+ **`2021-10-11`**: [Leaderboard](https://insightface.ai/mfr21) of [ICCV21 - Masked Face Recognition Challenge](challenges/iccv21-mfr) released. Video: [Youtube](https://www.youtube.com/watch?v=lL-7l5t6x2w), [Bilibili](https://www.bilibili.com/video/BV15b4y1h79N/).
49
+
50
+ **`2021-06-05`**: We launch a [Masked Face Recognition Challenge & Workshop](challenges/iccv21-mfr) on ICCV 2021.
51
+
52
+
53
+
54
+ ## Introduction
55
+
56
+ [InsightFace](https://insightface.ai) is an open source 2D&3D deep face analysis toolbox, mainly based on PyTorch and MXNet.
57
+
58
+ Please check our [website](https://insightface.ai) for detail.
59
+
60
+ The master branch works with **PyTorch 1.6+** and/or **MXNet=1.6-1.8**, with **Python 3.x**.
61
+
62
+ InsightFace efficiently implements a rich variety of state of the art algorithms of face recognition, face detection and face alignment, which optimized for both training and deployment.
63
+
64
+ ## Quick Start
65
+
66
+ Please start with our [python-package](python-package/), for testing detection, recognition and alignment models on input images.
67
+
68
+
69
+ ### ArcFace Video Demo
70
+
71
+
72
+ [<img src=https://insightface.ai/assets/img/github/facerecognitionfromvideo.PNG width="760" />](https://www.youtube.com/watch?v=y-D1tReryGA&t=81s)
73
+
74
+
75
+ Please click the image to watch the Youtube video. For Bilibili users, click [here](https://www.bilibili.com/video/av38041494?from=search&seid=11501833604850032313).
76
+
77
+
78
+
79
+ ## Projects
80
+
81
+ The [page](https://insightface.ai/projects) on InsightFace website also describes all supported projects in InsightFace.
82
+
83
+ You may also interested in some [challenges](https://insightface.ai/challenges) hold by InsightFace.
84
+
85
+
86
+
87
+ ## Face Recognition
88
+
89
+ ### Introduction
90
+
91
+ In this module, we provide training data, network settings and loss designs for deep face recognition.
92
+
93
+ The supported methods are as follows:
94
+
95
+ - [x] [ArcFace_mxnet (CVPR'2019)](recognition/arcface_mxnet)
96
+ - [x] [ArcFace_torch (CVPR'2019)](recognition/arcface_torch)
97
+ - [x] [SubCenter ArcFace (ECCV'2020)](recognition/subcenter_arcface)
98
+ - [x] [PartialFC_mxnet (CVPR'2022)](recognition/partial_fc)
99
+ - [x] [PartialFC_torch (CVPR'2022)](recognition/arcface_torch)
100
+ - [x] [VPL (CVPR'2021)](recognition/vpl)
101
+ - [x] [Arcface_oneflow](recognition/arcface_oneflow)
102
+ - [x] [ArcFace_Paddle (CVPR'2019)](recognition/arcface_paddle)
103
+
104
+ Commonly used network backbones are included in most of the methods, such as IResNet, MobilefaceNet, MobileNet, InceptionResNet_v2, DenseNet, etc..
105
+
106
+
107
+ ### Datasets
108
+
109
+ The training data includes, but not limited to the cleaned MS1M, VGG2 and CASIA-Webface datasets, which were already packed in MXNet binary format. Please [dataset](recognition/_datasets_) page for detail.
110
+
111
+ ### Evaluation
112
+
113
+ We provide standard IJB and Megaface evaluation pipelines in [evaluation](recognition/_evaluation_)
114
+
115
+
116
+ ### Pretrained Models
117
+
118
+ **Please check [Model-Zoo](https://github.com/deepinsight/insightface/wiki/Model-Zoo) for more pretrained models.**
119
+
120
+ ### Third-party Re-implementation of ArcFace
121
+
122
+ - TensorFlow: [InsightFace_TF](https://github.com/auroua/InsightFace_TF)
123
+ - TensorFlow: [tf-insightface](https://github.com/AIInAi/tf-insightface)
124
+ - TensorFlow:[insightface](https://github.com/Fei-Wang/insightface)
125
+ - PyTorch: [InsightFace_Pytorch](https://github.com/TreB1eN/InsightFace_Pytorch)
126
+ - PyTorch: [arcface-pytorch](https://github.com/ronghuaiyang/arcface-pytorch)
127
+ - Caffe: [arcface-caffe](https://github.com/xialuxi/arcface-caffe)
128
+ - Caffe: [CombinedMargin-caffe](https://github.com/gehaocool/CombinedMargin-caffe)
129
+ - Tensorflow: [InsightFace-tensorflow](https://github.com/luckycallor/InsightFace-tensorflow)
130
+ - TensorRT: [wang-xinyu/tensorrtx](https://github.com/wang-xinyu/tensorrtx)
131
+ - TensorRT: [InsightFace-REST](https://github.com/SthPhoenix/InsightFace-REST)
132
+ - ONNXRuntime C++: [ArcFace-ONNXRuntime](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ort/cv/glint_arcface.cpp)
133
+ - ONNXRuntime Go: [arcface-go](https://github.com/jack139/arcface-go)
134
+ - MNN: [ArcFace-MNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/mnn/cv/mnn_glint_arcface.cpp)
135
+ - TNN: [ArcFace-TNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/tnn/cv/tnn_glint_arcface.cpp)
136
+ - NCNN: [ArcFace-NCNN](https://github.com/DefTruth/lite.ai.toolkit/blob/main/lite/ncnn/cv/ncnn_glint_arcface.cpp)
137
+
138
+ ## Face Detection
139
+
140
+ ### Introduction
141
+
142
+ <div align="left">
143
+ <img src="https://insightface.ai/assets/img/github/11513D05.jpg" width="640"/>
144
+ </div>
145
+
146
+ In this module, we provide training data with annotation, network settings and loss designs for face detection training, evaluation and inference.
147
+
148
+ The supported methods are as follows:
149
+
150
+ - [x] [RetinaFace (CVPR'2020)](detection/retinaface)
151
+ - [x] [SCRFD (Arxiv'2021)](detection/scrfd)
152
+ - [x] [blazeface_paddle](detection/blazeface_paddle)
153
+
154
+ [RetinaFace](detection/retinaface) is a practical single-stage face detector which is accepted by [CVPR 2020](https://openaccess.thecvf.com/content_CVPR_2020/html/Deng_RetinaFace_Single-Shot_Multi-Level_Face_Localisation_in_the_Wild_CVPR_2020_paper.html). We provide training code, training dataset, pretrained models and evaluation scripts.
155
+
156
+ [SCRFD](detection/scrfd) is an efficient high accuracy face detection approach which is initialy described in [Arxiv](https://arxiv.org/abs/2105.04714). We provide an easy-to-use pipeline to train high efficiency face detectors with NAS supporting.
157
+
158
+
159
+ ## Face Alignment
160
+
161
+ ### Introduction
162
+
163
+ <div align="left">
164
+ <img src="https://insightface.ai/assets/img/custom/thumb_sdunet.png" width="600"/>
165
+ </div>
166
+
167
+ In this module, we provide datasets and training/inference pipelines for face alignment.
168
+
169
+ Supported methods:
170
+
171
+ - [x] [SDUNets (BMVC'2018)](alignment/heatmap)
172
+ - [x] [SimpleRegression](alignment/coordinate_reg)
173
+
174
+
175
+ [SDUNets](alignment/heatmap) is a heatmap based method which accepted on [BMVC](http://bmvc2018.org/contents/papers/0051.pdf).
176
+
177
+ [SimpleRegression](alignment/coordinate_reg) provides very lightweight facial landmark models with fast coordinate regression. The input of these models is loose cropped face image while the output is the direct landmark coordinates.
178
+
179
+
180
+ ## Citation
181
+
182
+ If you find *InsightFace* useful in your research, please consider to cite the following related papers:
183
+
184
+ ```
185
+ @inproceedings{ren2023pbidr,
186
+ title={Facial Geometric Detail Recovery via Implicit Representation},
187
+ author={Ren, Xingyu and Lattas, Alexandros and Gecer, Baris and Deng, Jiankang and Ma, Chao and Yang, Xiaokang},
188
+ booktitle={2023 IEEE 17th International Conference on Automatic Face and Gesture Recognition (FG)},
189
+ year={2023}
190
+ }
191
+
192
+ @article{guo2021sample,
193
+ title={Sample and Computation Redistribution for Efficient Face Detection},
194
+ author={Guo, Jia and Deng, Jiankang and Lattas, Alexandros and Zafeiriou, Stefanos},
195
+ journal={arXiv preprint arXiv:2105.04714},
196
+ year={2021}
197
+ }
198
+
199
+ @inproceedings{gecer2021ostec,
200
+ title={OSTeC: One-Shot Texture Completion},
201
+ author={Gecer, Baris and Deng, Jiankang and Zafeiriou, Stefanos},
202
+ booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
203
+ year={2021}
204
+ }
205
+
206
+ @inproceedings{an2020partical_fc,
207
+ title={Partial FC: Training 10 Million Identities on a Single Machine},
208
+ author={An, Xiang and Zhu, Xuhan and Xiao, Yang and Wu, Lan and Zhang, Ming and Gao, Yuan and Qin, Bin and
209
+ Zhang, Debing and Fu Ying},
210
+ booktitle={Arxiv 2010.05222},
211
+ year={2020}
212
+ }
213
+
214
+ @inproceedings{deng2020subcenter,
215
+ title={Sub-center ArcFace: Boosting Face Recognition by Large-scale Noisy Web Faces},
216
+ author={Deng, Jiankang and Guo, Jia and Liu, Tongliang and Gong, Mingming and Zafeiriou, Stefanos},
217
+ booktitle={Proceedings of the IEEE Conference on European Conference on Computer Vision},
218
+ year={2020}
219
+ }
220
+
221
+ @inproceedings{Deng2020CVPR,
222
+ title = {RetinaFace: Single-Shot Multi-Level Face Localisation in the Wild},
223
+ author = {Deng, Jiankang and Guo, Jia and Ververas, Evangelos and Kotsia, Irene and Zafeiriou, Stefanos},
224
+ booktitle = {CVPR},
225
+ year = {2020}
226
+ }
227
+
228
+ @inproceedings{guo2018stacked,
229
+ title={Stacked Dense U-Nets with Dual Transformers for Robust Face Alignment},
230
+ author={Guo, Jia and Deng, Jiankang and Xue, Niannan and Zafeiriou, Stefanos},
231
+ booktitle={BMVC},
232
+ year={2018}
233
+ }
234
+
235
+ @article{deng2018menpo,
236
+ title={The Menpo benchmark for multi-pose 2D and 3D facial landmark localisation and tracking},
237
+ author={Deng, Jiankang and Roussos, Anastasios and Chrysos, Grigorios and Ververas, Evangelos and Kotsia, Irene and Shen, Jie and Zafeiriou, Stefanos},
238
+ journal={IJCV},
239
+ year={2018}
240
+ }
241
+
242
+ @inproceedings{deng2018arcface,
243
+ title={ArcFace: Additive Angular Margin Loss for Deep Face Recognition},
244
+ author={Deng, Jiankang and Guo, Jia and Niannan, Xue and Zafeiriou, Stefanos},
245
+ booktitle={CVPR},
246
+ year={2019}
247
+ }
248
+ ```
249
+
250
+ ## Contributing
251
+
252
+ Main contributors:
253
+
254
+ - [Jia Guo](https://github.com/nttstar), ``guojia[at]gmail.com``
255
+ - [Jiankang Deng](https://github.com/jiankangdeng) ``jiankangdeng[at]gmail.com``
256
+ - [Xiang An](https://github.com/anxiangsir) ``anxiangsir[at]gmail.com``
257
+ - [Jack Yu](https://github.com/szad670401) ``jackyu961127[at]gmail.com``
258
+ - [Baris Gecer](https://barisgecer.github.io/) ``barisgecer[at]msn.com``
insightface/alignment/README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Face Alignment
2
+
3
+
4
+ <div align="left">
5
+ <img src="https://insightface.ai/assets/img/custom/logo3.jpg" width="240"/>
6
+ </div>
7
+
8
+
9
+ ## Introduction
10
+
11
+ These are the face alignment methods of [InsightFace](https://insightface.ai)
12
+
13
+
14
+ <div align="left">
15
+ <img src="https://insightface.ai/assets/img/custom/thumb_sdunet.png" width="600"/>
16
+ </div>
17
+
18
+
19
+ ### Datasets
20
+
21
+ Please refer to [datasets](_datasets_) page for the details of face alignment datasets used for training and evaluation.
22
+
23
+ ### Evaluation
24
+
25
+ Please refer to [evaluation](_evaluation_) page for the details of face alignment evaluation.
26
+
27
+
28
+ ## Methods
29
+
30
+
31
+ Supported methods:
32
+
33
+ - [x] [SDUNets (BMVC'2018)](heatmap)
34
+ - [x] [SimpleRegression](coordinate_reg)
35
+ - [x] [Alignment By Face Synthetics](synthetics)
36
+
37
+
38
+ ## Contributing
39
+
40
+ We appreciate all contributions to improve the face alignment model zoo of InsightFace.
41
+
42
+
insightface/alignment/_datasets_/README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Face Alignment Datasets
2
+
3
+ (Updating)
4
+
5
+ ## Training Datasets
6
+
7
+ ### Menpo2D-Train
8
+
9
+ https://ibug.doc.ic.ac.uk/resources/2nd-facial-landmark-tracking-competition-menpo-ben/
10
+
11
+ ### 300W-Train
12
+
13
+ https://ibug.doc.ic.ac.uk/resources/300-W/
14
+
15
+
16
+ ### LFPW
17
+
18
+ https://neerajkumar.org/databases/lfpw/
19
+
20
+ ### Helen
21
+
22
+ http://www.ifp.illinois.edu/~vuongle2/helen/
23
+
24
+ ### AFW
25
+
26
+ ### AFLW
27
+
28
+ https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/aflw/
29
+
30
+ ### FDDB
31
+
32
+
33
+ ### Face Synthetics
34
+
35
+ https://github.com/microsoft/FaceSynthetics
36
+
37
+ ### 300W-LP (3D annotation)
38
+
39
+ http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm
40
+
41
+ ## Test Datasets
42
+
43
+ ### 300W-Test
44
+
45
+ https://ibug.doc.ic.ac.uk/resources/300-W/
46
+
47
+ ### COFW
48
+
49
+ http://www.vision.caltech.edu/xpburgos/ICCV13/#dataset
50
+
51
+ ### Menpo2D-Test
52
+
53
+ https://ibug.doc.ic.ac.uk/resources/2nd-facial-landmark-tracking-competition-menpo-ben/
54
+
55
+ ### AFLW2000-3D (3D annotation)
56
+
57
+ http://www.cbsr.ia.ac.cn/users/xiangyuzhu/projects/3DDFA/main.htm
insightface/alignment/coordinate_reg/README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Introduction
2
+
3
+ Here we provide some lightweight facial landmark models with fast coordinate regression.
4
+ The input of these models is loose cropped face image while the output is the direct landmark coordinates.
5
+
6
+
7
+ ### Pretrained Models
8
+
9
+ - **Model ``2d106det``**
10
+
11
+ **2021.07: We now support model inference by our `insightface` python package, please check [image_infer.py](image_infer.py) for detail.**
12
+
13
+ Given face detection bounding box, predict 2d-106 landmarks. Mainly used for static image inference.
14
+
15
+ Backbone: MobileNet-0.5, size 5MB.
16
+
17
+ Input: size 192x192, loose cropped detection bounding-box.
18
+
19
+ Download link:
20
+
21
+ [baidu cloud](https://pan.baidu.com/s/10m5GmtNV5snynDrq3KqIdg) (code: ``lqvv``)
22
+
23
+ [google drive](https://drive.google.com/file/d/13Pz8mH-a1s7RXpq_jFUXxaqCpDUE0oSr/view?usp=sharing)
24
+
25
+
26
+
27
+ - **Model ``2d106track``**
28
+
29
+ Given landmarks bounding box, predict 2d-106 landmarks. Used for video landmarks tracking.
30
+
31
+ Download link: coming soon
32
+
33
+ ### Visualization
34
+
35
+
36
+ <p align="center">Points mark-up(ordered by point names):</p>
37
+
38
+ <div align="center">
39
+ <img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/2d106markup.jpg" alt="markup" width="320">
40
+ </div>
41
+
42
+
43
+ <p align="center">Image result:</p>
44
+
45
+ <div align="center">
46
+ <img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/t1_out.jpg" alt="imagevis" width="800">
47
+ </div>
48
+
49
+
50
+ <p align="center">Video result:</p>
51
+
52
+ <div align="center">
53
+ <img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/C_jiaguo.gif" alt="videovis" width="240">
54
+ </div>
55
+
56
+
57
+ ### FAQ
58
+
insightface/alignment/coordinate_reg/image_infer.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ import os
4
+ import insightface
5
+ from insightface.app import FaceAnalysis
6
+ from insightface.data import get_image as ins_get_image
7
+
8
+ if __name__ == '__main__':
9
+ app = FaceAnalysis(allowed_modules=['detection', 'landmark_2d_106'])
10
+ app.prepare(ctx_id=0, det_size=(640, 640))
11
+ img = ins_get_image('t1')
12
+ faces = app.get(img)
13
+ #assert len(faces)==6
14
+ tim = img.copy()
15
+ color = (200, 160, 75)
16
+ for face in faces:
17
+ lmk = face.landmark_2d_106
18
+ lmk = np.round(lmk).astype(np.int)
19
+ for i in range(lmk.shape[0]):
20
+ p = tuple(lmk[i])
21
+ cv2.circle(tim, p, 1, color, 1, cv2.LINE_AA)
22
+ cv2.imwrite('./test_out.jpg', tim)
23
+
insightface/alignment/heatmap/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ We provide our implementation of ``Stacked Dense U-Nets with Dual Transformers for Robust Face Alignment`` here at [BMVC](http://bmvc2018.org/contents/papers/0051.pdf) or link at [Arxiv](https://arxiv.org/abs/1812.01936).
2
+
3
+ We also provide some popular heatmap based approaches like stacked hourglass, etc.. You can define different loss-type/network structure/dataset in ``config.py``(from ``sample_config.py``).
4
+
5
+ For example, by default, you can train our approach by ``train.py --network sdu`` or train hourglass network by ``train.py --network hourglass``.
6
+
7
+ 2D training/validation dataset is now available at [baiducloud](https://pan.baidu.com/s/1kdquiIGTlK7l26SPWO_cmw) or [dropbox](https://www.dropbox.com/s/por6mbguegmywo6/bmvc_sdu_data2d.zip?dl=0)
8
+
9
+ 3D training/validation dataset is now available at [baiducloud](https://pan.baidu.com/s/1VjFWm6eEtIqGKk92GE2rgw) or [dropbox](https://www.dropbox.com/s/tjze176lh76nciw/bmvc_sdu_data3d.zip?dl=0)
10
+
insightface/alignment/heatmap/data.py ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pylint: skip-file
2
+ import mxnet as mx
3
+ import numpy as np
4
+ import sys, os
5
+ import random
6
+ import math
7
+ import scipy.misc
8
+ import cv2
9
+ import logging
10
+ import sklearn
11
+ import datetime
12
+ import img_helper
13
+ from mxnet.io import DataIter
14
+ from mxnet import ndarray as nd
15
+ from mxnet import io
16
+ from mxnet import recordio
17
+ from PIL import Image
18
+ from config import config
19
+ from skimage import transform as tf
20
+
21
+
22
+ class FaceSegIter(DataIter):
23
+ def __init__(self,
24
+ batch_size,
25
+ per_batch_size=0,
26
+ path_imgrec=None,
27
+ aug_level=0,
28
+ force_mirror=False,
29
+ exf=1,
30
+ use_coherent=0,
31
+ args=None,
32
+ data_name="data",
33
+ label_name="softmax_label"):
34
+ self.aug_level = aug_level
35
+ self.force_mirror = force_mirror
36
+ self.use_coherent = use_coherent
37
+ self.exf = exf
38
+ self.batch_size = batch_size
39
+ self.per_batch_size = per_batch_size
40
+ self.data_name = data_name
41
+ self.label_name = label_name
42
+ assert path_imgrec
43
+ logging.info('loading recordio %s...', path_imgrec)
44
+ path_imgidx = path_imgrec[0:-4] + ".idx"
45
+ self.imgrec = mx.recordio.MXIndexedRecordIO(path_imgidx, path_imgrec,
46
+ 'r') # pylint: disable=redefined-variable-type
47
+ self.oseq = list(self.imgrec.keys)
48
+ print('train size', len(self.oseq))
49
+ self.cur = 0
50
+ self.reset()
51
+ self.data_shape = (3, config.input_img_size, config.input_img_size)
52
+ self.num_classes = config.num_classes
53
+ self.input_img_size = config.input_img_size
54
+ #self.label_classes = self.num_classes
55
+ if config.losstype == 'heatmap':
56
+ if aug_level > 0:
57
+ self.output_label_size = config.output_label_size
58
+ self.label_shape = (self.num_classes, self.output_label_size,
59
+ self.output_label_size)
60
+ else:
61
+ self.output_label_size = self.input_img_size
62
+ #self.label_shape = (self.num_classes, 2)
63
+ self.label_shape = (self.num_classes, self.output_label_size,
64
+ self.output_label_size)
65
+ else:
66
+ if aug_level > 0:
67
+ self.output_label_size = config.output_label_size
68
+ self.label_shape = (self.num_classes, 2)
69
+ else:
70
+ self.output_label_size = self.input_img_size
71
+ #self.label_shape = (self.num_classes, 2)
72
+ self.label_shape = (self.num_classes, 2)
73
+ self.provide_data = [(data_name, (batch_size, ) + self.data_shape)]
74
+ self.provide_label = [(label_name, (batch_size, ) + self.label_shape)]
75
+ self.img_num = 0
76
+ self.invalid_num = 0
77
+ self.mode = 1
78
+ self.vis = 0
79
+ self.stats = [0, 0]
80
+ self.flip_order = [
81
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 26, 25,
82
+ 24, 23, 22, 21, 20, 19, 18, 17, 27, 28, 29, 30, 35, 34, 33, 32, 31,
83
+ 45, 44, 43, 42, 47, 46, 39, 38, 37, 36, 41, 40, 54, 53, 52, 51, 50,
84
+ 49, 48, 59, 58, 57, 56, 55, 64, 63, 62, 61, 60, 67, 66, 65
85
+ ]
86
+ #self.mirror_set = [
87
+ # (22,23),
88
+ # (21,24),
89
+ # (20,25),
90
+ # (19,26),
91
+ # (18,27),
92
+ # (40,43),
93
+ # (39,44),
94
+ # (38,45),
95
+ # (37,46),
96
+ # (42,47),
97
+ # (41,48),
98
+ # (33,35),
99
+ # (32,36),
100
+ # (51,53),
101
+ # (50,54),
102
+ # (62,64),
103
+ # (61,65),
104
+ # (49,55),
105
+ # (49,55),
106
+ # (68,66),
107
+ # (60,56),
108
+ # (59,57),
109
+ # (1,17),
110
+ # (2,16),
111
+ # (3,15),
112
+ # (4,14),
113
+ # (5,13),
114
+ # (6,12),
115
+ # (7,11),
116
+ # (8,10),
117
+ # ]
118
+
119
+ def get_data_shape(self):
120
+ return self.data_shape
121
+
122
+ #def get_label_shape(self):
123
+ # return self.label_shape
124
+
125
+ def get_shape_dict(self):
126
+ D = {}
127
+ for (k, v) in self.provide_data:
128
+ D[k] = v
129
+ for (k, v) in self.provide_label:
130
+ D[k] = v
131
+ return D
132
+
133
+ def get_label_names(self):
134
+ D = []
135
+ for (k, v) in self.provide_label:
136
+ D.append(k)
137
+ return D
138
+
139
+ def reset(self):
140
+ #print('reset')
141
+ if self.aug_level == 0:
142
+ self.seq = self.oseq
143
+ else:
144
+ self.seq = []
145
+ for _ in range(self.exf):
146
+ _seq = self.oseq[:]
147
+ random.shuffle(_seq)
148
+ self.seq += _seq
149
+ print('train size after reset', len(self.seq))
150
+ self.cur = 0
151
+
152
+ def next_sample(self):
153
+ """Helper function for reading in next sample."""
154
+ if self.cur >= len(self.seq):
155
+ raise StopIteration
156
+ idx = self.seq[self.cur]
157
+ self.cur += 1
158
+ s = self.imgrec.read_idx(idx)
159
+ header, img = recordio.unpack(s)
160
+ img = mx.image.imdecode(img).asnumpy()
161
+ hlabel = np.array(header.label).reshape((self.num_classes, 2))
162
+ if not config.label_xfirst:
163
+ hlabel = hlabel[:, ::-1] #convert to X/W first
164
+ annot = {'scale': config.base_scale}
165
+
166
+ #ul = np.array( (50000,50000), dtype=np.int32)
167
+ #br = np.array( (0,0), dtype=np.int32)
168
+ #for i in range(hlabel.shape[0]):
169
+ # h = int(hlabel[i][0])
170
+ # w = int(hlabel[i][1])
171
+ # key = np.array((h,w))
172
+ # ul = np.minimum(key, ul)
173
+ # br = np.maximum(key, br)
174
+
175
+ return img, hlabel, annot
176
+
177
+ def get_flip(self, data, label):
178
+ data_flip = np.zeros_like(data)
179
+ label_flip = np.zeros_like(label)
180
+ for k in range(data_flip.shape[2]):
181
+ data_flip[:, :, k] = np.fliplr(data[:, :, k])
182
+ for k in range(label_flip.shape[0]):
183
+ label_flip[k, :] = np.fliplr(label[k, :])
184
+ #print(label[0,:].shape)
185
+ label_flip = label_flip[self.flip_order, :]
186
+ return data_flip, label_flip
187
+
188
+ def get_data(self, data, label, annot):
189
+ if self.vis:
190
+ self.img_num += 1
191
+ #if self.img_num<=self.vis:
192
+ # filename = './vis/raw_%d.jpg' % (self.img_num)
193
+ # print('save', filename)
194
+ # draw = data.copy()
195
+ # for i in range(label.shape[0]):
196
+ # cv2.circle(draw, (label[i][1], label[i][0]), 1, (0, 0, 255), 2)
197
+ # scipy.misc.imsave(filename, draw)
198
+
199
+ rotate = 0
200
+ #scale = 1.0
201
+ if 'scale' in annot:
202
+ scale = annot['scale']
203
+ else:
204
+ scale = max(data.shape[0], data.shape[1])
205
+ if 'center' in annot:
206
+ center = annot['center']
207
+ else:
208
+ center = np.array((data.shape[1] / 2, data.shape[0] / 2))
209
+ max_retry = 3
210
+ if self.aug_level == 0: #validation mode
211
+ max_retry = 6
212
+ retry = 0
213
+ found = False
214
+ base_scale = scale
215
+ while retry < max_retry:
216
+ retry += 1
217
+ succ = True
218
+ _scale = base_scale
219
+ if self.aug_level > 0:
220
+ rotate = np.random.randint(-40, 40)
221
+ scale_config = 0.2
222
+ #rotate = 0
223
+ #scale_config = 0.0
224
+ scale_ratio = min(
225
+ 1 + scale_config,
226
+ max(1 - scale_config,
227
+ (np.random.randn() * scale_config) + 1))
228
+ _scale = int(base_scale * scale_ratio)
229
+ #translate = np.random.randint(-5, 5, size=(2,))
230
+ #center += translate
231
+ data_out, trans = img_helper.transform(data, center,
232
+ self.input_img_size, _scale,
233
+ rotate)
234
+ #data_out = img_helper.crop2(data, center, _scale, (self.input_img_size, self.input_img_size), rot=rotate)
235
+ label_out = np.zeros(self.label_shape, dtype=np.float32)
236
+ #print('out shapes', data_out.shape, label_out.shape)
237
+ for i in range(label.shape[0]):
238
+ pt = label[i].copy()
239
+ #pt = pt[::-1]
240
+ npt = img_helper.transform_pt(pt, trans)
241
+ if npt[0] >= data_out.shape[1] or npt[1] >= data_out.shape[
242
+ 0] or npt[0] < 0 or npt[1] < 0:
243
+ succ = False
244
+ #print('err npt', npt)
245
+ break
246
+ if config.losstype == 'heatmap':
247
+ pt_scale = float(
248
+ self.output_label_size) / self.input_img_size
249
+ npt *= pt_scale
250
+ npt = npt.astype(np.int32)
251
+ img_helper.gaussian(label_out[i], npt, config.gaussian)
252
+ else:
253
+ label_out[i] = (npt / self.input_img_size)
254
+ #print('before gaussian', label_out[i].shape, pt.shape)
255
+ #trans = img_helper.transform(pt, center, _scale, (self.output_label_size, self.output_label_size), rot=rotate)
256
+ #print(trans.shape)
257
+ #if not img_helper.gaussian(label_out[i], trans, _g):
258
+ # succ = False
259
+ # break
260
+ if not succ:
261
+ if self.aug_level == 0:
262
+ base_scale += 20
263
+ continue
264
+
265
+ flip_data_out = None
266
+ flip_label_out = None
267
+ if config.net_coherent:
268
+ flip_data_out, flip_label_out = self.get_flip(
269
+ data_out, label_out)
270
+ elif ((self.aug_level > 0 and np.random.rand() < 0.5)
271
+ or self.force_mirror): #flip aug
272
+ flip_data_out, flip_label_out = self.get_flip(
273
+ data_out, label_out)
274
+ data_out, label_out = flip_data_out, flip_label_out
275
+
276
+ found = True
277
+ break
278
+
279
+ #self.stats[0]+=1
280
+ if not found:
281
+ #self.stats[1]+=1
282
+ #print('find aug error', retry)
283
+ #print(self.stats)
284
+ #print('!!!ERR')
285
+ return None
286
+ #print('found with scale', _scale, rotate)
287
+
288
+ if self.vis > 0 and self.img_num <= self.vis:
289
+ print('crop', data.shape, center, _scale, rotate, data_out.shape)
290
+ filename = './vis/cropped_%d.jpg' % (self.img_num)
291
+ print('save', filename)
292
+ draw = data_out.copy()
293
+ alabel = label_out.copy()
294
+ for i in range(label.shape[0]):
295
+ a = cv2.resize(alabel[i],
296
+ (self.input_img_size, self.input_img_size))
297
+ ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
298
+ cv2.circle(draw, (ind[1], ind[0]), 1, (0, 0, 255), 2)
299
+ scipy.misc.imsave(filename, draw)
300
+ filename = './vis/raw_%d.jpg' % (self.img_num)
301
+ scipy.misc.imsave(filename, data)
302
+
303
+ return data_out, label_out, flip_data_out, flip_label_out
304
+
305
+ def next(self):
306
+ """Returns the next batch of data."""
307
+ #print('next')
308
+ batch_size = self.batch_size
309
+ batch_data = nd.empty((batch_size, ) + self.data_shape)
310
+ batch_label = nd.empty((batch_size, ) + self.label_shape)
311
+ i = 0
312
+ #self.cutoff = random.randint(800,1280)
313
+ try:
314
+ while i < batch_size:
315
+ #print('N', i)
316
+ data, label, annot = self.next_sample()
317
+ R = self.get_data(data, label, annot)
318
+ if R is None:
319
+ continue
320
+ data_out, label_out, flip_data_out, flip_label_out = R
321
+ if not self.use_coherent:
322
+ data = nd.array(data_out)
323
+ data = nd.transpose(data, axes=(2, 0, 1))
324
+ label = nd.array(label_out)
325
+ #print(data.shape, label.shape)
326
+ batch_data[i][:] = data
327
+ batch_label[i][:] = label
328
+ i += 1
329
+ else:
330
+ data = nd.array(data_out)
331
+ data = nd.transpose(data, axes=(2, 0, 1))
332
+ label = nd.array(label_out)
333
+ data2 = nd.array(flip_data_out)
334
+ data2 = nd.transpose(data2, axes=(2, 0, 1))
335
+ label2 = nd.array(flip_label_out)
336
+ #M = nd.array(M)
337
+ #print(data.shape, label.shape)
338
+ batch_data[i][:] = data
339
+ batch_label[i][:] = label
340
+ #i+=1
341
+ j = i + self.per_batch_size // 2
342
+ batch_data[j][:] = data2
343
+ batch_label[j][:] = label2
344
+ i += 1
345
+ if j % self.per_batch_size == self.per_batch_size - 1:
346
+ i = j + 1
347
+ except StopIteration:
348
+ if i < batch_size:
349
+ raise StopIteration
350
+
351
+ #return {self.data_name : batch_data,
352
+ # self.label_name : batch_label}
353
+ #print(batch_data.shape, batch_label.shape)
354
+ return mx.io.DataBatch([batch_data], [batch_label], batch_size - i)
insightface/alignment/heatmap/img_helper.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import math
3
+ import cv2
4
+ from skimage import transform as stf
5
+
6
+
7
+ def transform(data, center, output_size, scale, rotation):
8
+ scale_ratio = float(output_size) / scale
9
+ rot = float(rotation) * np.pi / 180.0
10
+ #translation = (output_size/2-center[0]*scale_ratio, output_size/2-center[1]*scale_ratio)
11
+ t1 = stf.SimilarityTransform(scale=scale_ratio)
12
+ cx = center[0] * scale_ratio
13
+ cy = center[1] * scale_ratio
14
+ t2 = stf.SimilarityTransform(translation=(-1 * cx, -1 * cy))
15
+ t3 = stf.SimilarityTransform(rotation=rot)
16
+ t4 = stf.SimilarityTransform(translation=(output_size / 2,
17
+ output_size / 2))
18
+ t = t1 + t2 + t3 + t4
19
+ trans = t.params[0:2]
20
+ #print('M', scale, rotation, trans)
21
+ cropped = cv2.warpAffine(data,
22
+ trans, (output_size, output_size),
23
+ borderValue=0.0)
24
+ return cropped, trans
25
+
26
+
27
+ def transform_pt(pt, trans):
28
+ new_pt = np.array([pt[0], pt[1], 1.]).T
29
+ new_pt = np.dot(trans, new_pt)
30
+ #print('new_pt', new_pt.shape, new_pt)
31
+ return new_pt[:2]
32
+
33
+
34
+ def gaussian(img, pt, sigma):
35
+ # Draw a 2D gaussian
36
+ assert (sigma >= 0)
37
+ if sigma == 0:
38
+ img[pt[1], pt[0]] = 1.0
39
+ return True
40
+ #assert pt[0]<=img.shape[1]
41
+ #assert pt[1]<=img.shape[0]
42
+
43
+ # Check that any part of the gaussian is in-bounds
44
+ ul = [int(pt[0] - 3 * sigma), int(pt[1] - 3 * sigma)]
45
+ br = [int(pt[0] + 3 * sigma + 1), int(pt[1] + 3 * sigma + 1)]
46
+ if (ul[0] > img.shape[1] or ul[1] >= img.shape[0] or br[0] < 0
47
+ or br[1] < 0):
48
+ # If not, just return the image as is
49
+ #print('gaussian error')
50
+ return False
51
+ #return img
52
+
53
+ # Generate gaussian
54
+ size = 6 * sigma + 1
55
+ x = np.arange(0, size, 1, float)
56
+ y = x[:, np.newaxis]
57
+ x0 = y0 = size // 2
58
+ # The gaussian is not normalized, we want the center value to equal 1
59
+ g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
60
+
61
+ # Usable gaussian range
62
+ g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
63
+ g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
64
+ # Image range
65
+ img_x = max(0, ul[0]), min(br[0], img.shape[1])
66
+ img_y = max(0, ul[1]), min(br[1], img.shape[0])
67
+
68
+ img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
69
+ return True
70
+ #return img
71
+
72
+
73
+ def estimate_trans_bbox(face, input_size, s=2.0):
74
+ w = face[2] - face[0]
75
+ h = face[3] - face[1]
76
+ wc = int((face[2] + face[0]) / 2)
77
+ hc = int((face[3] + face[1]) / 2)
78
+ im_size = max(w, h)
79
+ #size = int(im_size*1.2)
80
+ scale = input_size / (max(w, h) * s)
81
+ M = [
82
+ [scale, 0, input_size / 2 - wc * scale],
83
+ [0, scale, input_size / 2 - hc * scale],
84
+ ]
85
+ M = np.array(M)
86
+ return M
insightface/alignment/heatmap/metric.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mxnet as mx
2
+ import numpy as np
3
+ import math
4
+ import cv2
5
+ from config import config
6
+
7
+
8
+ class LossValueMetric(mx.metric.EvalMetric):
9
+ def __init__(self):
10
+ self.axis = 1
11
+ super(LossValueMetric, self).__init__('lossvalue',
12
+ axis=self.axis,
13
+ output_names=None,
14
+ label_names=None)
15
+ self.losses = []
16
+
17
+ def update(self, labels, preds):
18
+ loss = preds[0].asnumpy()[0]
19
+ self.sum_metric += loss
20
+ self.num_inst += 1.0
21
+
22
+
23
+ class NMEMetric(mx.metric.EvalMetric):
24
+ def __init__(self):
25
+ self.axis = 1
26
+ super(NMEMetric, self).__init__('NME',
27
+ axis=self.axis,
28
+ output_names=None,
29
+ label_names=None)
30
+ #self.losses = []
31
+ self.count = 0
32
+
33
+ def cal_nme(self, label, pred_label):
34
+ nme = []
35
+ for b in range(pred_label.shape[0]):
36
+ record = [None] * 6
37
+ item = []
38
+ if label.ndim == 4:
39
+ _heatmap = label[b][36]
40
+ if np.count_nonzero(_heatmap) == 0:
41
+ continue
42
+ else: #ndim==3
43
+ #print(label[b])
44
+ if np.count_nonzero(label[b]) == 0:
45
+ continue
46
+ for p in range(pred_label.shape[1]):
47
+ if label.ndim == 4:
48
+ heatmap_gt = label[b][p]
49
+ ind_gt = np.unravel_index(np.argmax(heatmap_gt, axis=None),
50
+ heatmap_gt.shape)
51
+ ind_gt = np.array(ind_gt)
52
+ else:
53
+ ind_gt = label[b][p]
54
+ #ind_gt = ind_gt.astype(np.int)
55
+ #print(ind_gt)
56
+ heatmap_pred = pred_label[b][p]
57
+ heatmap_pred = cv2.resize(
58
+ heatmap_pred,
59
+ (config.input_img_size, config.input_img_size))
60
+ ind_pred = np.unravel_index(np.argmax(heatmap_pred, axis=None),
61
+ heatmap_pred.shape)
62
+ ind_pred = np.array(ind_pred)
63
+ #print(ind_gt.shape)
64
+ #print(ind_pred)
65
+ if p == 36:
66
+ #print('b', b, p, ind_gt, np.count_nonzero(heatmap_gt))
67
+ record[0] = ind_gt
68
+ elif p == 39:
69
+ record[1] = ind_gt
70
+ elif p == 42:
71
+ record[2] = ind_gt
72
+ elif p == 45:
73
+ record[3] = ind_gt
74
+ if record[4] is None or record[5] is None:
75
+ record[4] = ind_gt
76
+ record[5] = ind_gt
77
+ else:
78
+ record[4] = np.minimum(record[4], ind_gt)
79
+ record[5] = np.maximum(record[5], ind_gt)
80
+ #print(ind_gt.shape, ind_pred.shape)
81
+ value = np.sqrt(np.sum(np.square(ind_gt - ind_pred)))
82
+ item.append(value)
83
+ _nme = np.mean(item)
84
+ if config.landmark_type == '2d':
85
+ left_eye = (record[0] + record[1]) / 2
86
+ right_eye = (record[2] + record[3]) / 2
87
+ _dist = np.sqrt(np.sum(np.square(left_eye - right_eye)))
88
+ #print('eye dist', _dist, left_eye, right_eye)
89
+ _nme /= _dist
90
+ else:
91
+ #_dist = np.sqrt(float(label.shape[2]*label.shape[3]))
92
+ _dist = np.sqrt(np.sum(np.square(record[5] - record[4])))
93
+ #print(_dist)
94
+ _nme /= _dist
95
+ nme.append(_nme)
96
+ return np.mean(nme)
97
+
98
+ def update(self, labels, preds):
99
+ self.count += 1
100
+ label = labels[0].asnumpy()
101
+ pred_label = preds[-1].asnumpy()
102
+ nme = self.cal_nme(label, pred_label)
103
+
104
+ #print('nme', nme)
105
+ #nme = np.mean(nme)
106
+ self.sum_metric += np.mean(nme)
107
+ self.num_inst += 1.0
insightface/alignment/heatmap/optimizer.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import mxnet as mx
2
+ import mxnet.optimizer as optimizer
3
+ from mxnet.ndarray import (NDArray, zeros, clip, sqrt, cast, maximum, abs as
4
+ NDabs)
5
+ #from mxnet.ndarray import (sgd_update, sgd_mom_update, adam_update, rmsprop_update, rmspropalex_update,
6
+ # mp_sgd_update, mp_sgd_mom_update, square, ftrl_update)
7
+
8
+
9
+ class ONadam(optimizer.Optimizer):
10
+ def __init__(self,
11
+ learning_rate=0.001,
12
+ beta1=0.9,
13
+ beta2=0.999,
14
+ epsilon=1e-8,
15
+ schedule_decay=0.004,
16
+ **kwargs):
17
+ super(ONadam, self).__init__(learning_rate=learning_rate, **kwargs)
18
+ self.beta1 = beta1
19
+ self.beta2 = beta2
20
+ self.epsilon = epsilon
21
+ self.schedule_decay = schedule_decay
22
+ self.m_schedule = 1.
23
+
24
+ def create_state(self, index, weight):
25
+ return (
26
+ zeros(weight.shape, weight.context, dtype=weight.dtype), # mean
27
+ zeros(weight.shape, weight.context,
28
+ dtype=weight.dtype)) # variance
29
+
30
+ def update(self, index, weight, grad, state):
31
+ assert (isinstance(weight, NDArray))
32
+ assert (isinstance(grad, NDArray))
33
+ self._update_count(index)
34
+ lr = self._get_lr(index)
35
+ wd = self._get_wd(index)
36
+
37
+ t = self._index_update_count[index]
38
+
39
+ # preprocess grad
40
+ #grad = grad * self.rescale_grad + wd * weight
41
+ grad *= self.rescale_grad + wd * weight
42
+ if self.clip_gradient is not None:
43
+ grad = clip(grad, -self.clip_gradient, self.clip_gradient)
44
+
45
+ # warming momentum schedule
46
+ momentum_t = self.beta1 * (1. - 0.5 *
47
+ (pow(0.96, t * self.schedule_decay)))
48
+ momentum_t_1 = self.beta1 * (1. - 0.5 *
49
+ (pow(0.96,
50
+ (t + 1) * self.schedule_decay)))
51
+ self.m_schedule = self.m_schedule * momentum_t
52
+ m_schedule_next = self.m_schedule * momentum_t_1
53
+
54
+ # update m_t and v_t
55
+ m_t, v_t = state
56
+ m_t[:] = self.beta1 * m_t + (1. - self.beta1) * grad
57
+ v_t[:] = self.beta2 * v_t + (1. - self.beta2) * grad * grad
58
+
59
+ grad_prime = grad / (1. - self.m_schedule)
60
+ m_t_prime = m_t / (1. - m_schedule_next)
61
+ v_t_prime = v_t / (1. - pow(self.beta2, t))
62
+ m_t_bar = (1. - momentum_t) * grad_prime + momentum_t_1 * m_t_prime
63
+
64
+ # update weight
65
+ weight[:] -= lr * m_t_bar / (sqrt(v_t_prime) + self.epsilon)
insightface/alignment/heatmap/sample_config.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from easydict import EasyDict as edict
3
+
4
+ config = edict()
5
+
6
+ #default training/dataset config
7
+ config.num_classes = 68
8
+ config.record_img_size = 384
9
+ config.base_scale = 256
10
+ config.input_img_size = 128
11
+ config.output_label_size = 64
12
+ config.label_xfirst = False
13
+ config.losstype = 'heatmap'
14
+ config.net_coherent = False
15
+ config.multiplier = 1.0
16
+
17
+ config.gaussian = 0
18
+
19
+ # network settings
20
+ network = edict()
21
+
22
+ network.hourglass = edict()
23
+ network.hourglass.net_coherent = False
24
+ network.hourglass.net_sta = 0
25
+ network.hourglass.net_n = 3
26
+ network.hourglass.net_dcn = 0
27
+ network.hourglass.net_stacks = 2
28
+ network.hourglass.net_block = 'resnet'
29
+ network.hourglass.net_binarize = False
30
+ network.hourglass.losstype = 'heatmap'
31
+
32
+ network.sdu = edict()
33
+ network.sdu.net_coherent = False
34
+ network.sdu.net_sta = 1
35
+ network.sdu.net_n = 3
36
+ network.sdu.net_dcn = 3
37
+ network.sdu.net_stacks = 2
38
+ network.sdu.net_block = 'cab'
39
+ network.sdu.net_binarize = False
40
+ network.sdu.losstype = 'heatmap'
41
+
42
+ # dataset settings
43
+ dataset = edict()
44
+
45
+ dataset.i2d = edict()
46
+ dataset.i2d.dataset = '2D'
47
+ dataset.i2d.landmark_type = '2d'
48
+ dataset.i2d.dataset_path = './data_2d'
49
+ dataset.i2d.num_classes = 68
50
+ dataset.i2d.record_img_size = 384
51
+ dataset.i2d.base_scale = 256
52
+ dataset.i2d.input_img_size = 128
53
+ dataset.i2d.output_label_size = 64
54
+ dataset.i2d.label_xfirst = False
55
+ dataset.i2d.val_targets = ['ibug', 'cofw_testset', '300W']
56
+
57
+ dataset.i3d = edict()
58
+ dataset.i3d.dataset = '3D'
59
+ dataset.i3d.landmark_type = '3d'
60
+ dataset.i3d.dataset_path = './data_3d'
61
+ dataset.i3d.num_classes = 68
62
+ dataset.i3d.record_img_size = 384
63
+ dataset.i3d.base_scale = 256
64
+ dataset.i3d.input_img_size = 128
65
+ dataset.i3d.output_label_size = 64
66
+ dataset.i3d.label_xfirst = False
67
+ dataset.i3d.val_targets = ['AFLW2000-3D']
68
+
69
+ # default settings
70
+ default = edict()
71
+
72
+ # default network
73
+ default.network = 'hourglass'
74
+ default.pretrained = ''
75
+ default.pretrained_epoch = 0
76
+ # default dataset
77
+ default.dataset = 'i2d'
78
+ default.frequent = 20
79
+ default.verbose = 200
80
+ default.kvstore = 'device'
81
+
82
+ default.prefix = 'model/A'
83
+ default.end_epoch = 10000
84
+ default.lr = 0.00025
85
+ default.wd = 0.0
86
+ default.per_batch_size = 20
87
+ default.lr_step = '16000,24000,30000'
88
+
89
+
90
+ def generate_config(_network, _dataset):
91
+ for k, v in network[_network].items():
92
+ config[k] = v
93
+ default[k] = v
94
+ for k, v in dataset[_dataset].items():
95
+ config[k] = v
96
+ default[k] = v
97
+ config.network = _network
98
+ config.dataset = _dataset
insightface/alignment/heatmap/symbol/sym_heatmap.py ADDED
@@ -0,0 +1,1085 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+ from __future__ import division
3
+ from __future__ import print_function
4
+ import mxnet as mx
5
+ import numpy as np
6
+ from config import config
7
+
8
+ ACT_BIT = 1
9
+ bn_mom = 0.9
10
+ workspace = 256
11
+ memonger = False
12
+
13
+
14
+ def Conv(**kwargs):
15
+ body = mx.sym.Convolution(**kwargs)
16
+ return body
17
+
18
+
19
+ def Act(data, act_type, name):
20
+ if act_type == 'prelu':
21
+ body = mx.sym.LeakyReLU(data=data, act_type='prelu', name=name)
22
+ else:
23
+ body = mx.symbol.Activation(data=data, act_type=act_type, name=name)
24
+ return body
25
+
26
+
27
+ #def lin(data, num_filter, workspace, name, binarize, dcn):
28
+ # bit = 1
29
+ # if not binarize:
30
+ # if not dcn:
31
+ # conv1 = Conv(data=data, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
32
+ # no_bias=True, workspace=workspace, name=name + '_conv')
33
+ # bn1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
34
+ # act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
35
+ # return act1
36
+ # else:
37
+ # bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
38
+ # act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
39
+ # conv1_offset = mx.symbol.Convolution(name=name+'_conv_offset', data = act1,
40
+ # num_filter=18, pad=(1, 1), kernel=(3, 3), stride=(1, 1))
41
+ # conv1 = mx.contrib.symbol.DeformableConvolution(name=name+"_conv", data=act1, offset=conv1_offset,
42
+ # num_filter=num_filter, pad=(1,1), kernel=(3, 3), num_deformable_group=1, stride=(1, 1), dilate=(1, 1), no_bias=False)
43
+ # #conv1 = Conv(data=act1, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
44
+ # # no_bias=False, workspace=workspace, name=name + '_conv')
45
+ # return conv1
46
+ # else:
47
+ # bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn')
48
+ # act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
49
+ # conv1 = mx.sym.QConvolution_v1(data=act1, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0),
50
+ # no_bias=True, workspace=workspace, name=name + '_conv', act_bit=ACT_BIT, weight_bit=bit)
51
+ # conv1 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
52
+ # return conv1
53
+
54
+
55
+ def lin3(data, num_filter, workspace, name, k, g=1, d=1):
56
+ if k != 3:
57
+ conv1 = Conv(data=data,
58
+ num_filter=num_filter,
59
+ kernel=(k, k),
60
+ stride=(1, 1),
61
+ pad=((k - 1) // 2, (k - 1) // 2),
62
+ num_group=g,
63
+ no_bias=True,
64
+ workspace=workspace,
65
+ name=name + '_conv')
66
+ else:
67
+ conv1 = Conv(data=data,
68
+ num_filter=num_filter,
69
+ kernel=(k, k),
70
+ stride=(1, 1),
71
+ pad=(d, d),
72
+ num_group=g,
73
+ dilate=(d, d),
74
+ no_bias=True,
75
+ workspace=workspace,
76
+ name=name + '_conv')
77
+ bn1 = mx.sym.BatchNorm(data=conv1,
78
+ fix_gamma=False,
79
+ momentum=bn_mom,
80
+ eps=2e-5,
81
+ name=name + '_bn')
82
+ act1 = Act(data=bn1, act_type='relu', name=name + '_relu')
83
+ ret = act1
84
+ return ret
85
+
86
+
87
+ def ConvFactory(data,
88
+ num_filter,
89
+ kernel,
90
+ stride=(1, 1),
91
+ pad=(0, 0),
92
+ act_type="relu",
93
+ mirror_attr={},
94
+ with_act=True,
95
+ dcn=False,
96
+ name=''):
97
+ if not dcn:
98
+ conv = mx.symbol.Convolution(data=data,
99
+ num_filter=num_filter,
100
+ kernel=kernel,
101
+ stride=stride,
102
+ pad=pad,
103
+ no_bias=True,
104
+ workspace=workspace,
105
+ name=name + '_conv')
106
+ else:
107
+ conv_offset = mx.symbol.Convolution(name=name + '_conv_offset',
108
+ data=data,
109
+ num_filter=18,
110
+ pad=(1, 1),
111
+ kernel=(3, 3),
112
+ stride=(1, 1))
113
+ conv = mx.contrib.symbol.DeformableConvolution(name=name + "_conv",
114
+ data=data,
115
+ offset=conv_offset,
116
+ num_filter=num_filter,
117
+ pad=(1, 1),
118
+ kernel=(3, 3),
119
+ num_deformable_group=1,
120
+ stride=stride,
121
+ dilate=(1, 1),
122
+ no_bias=False)
123
+ bn = mx.symbol.BatchNorm(data=conv,
124
+ fix_gamma=False,
125
+ momentum=bn_mom,
126
+ eps=2e-5,
127
+ name=name + '_bn')
128
+ if with_act:
129
+ act = Act(bn, act_type, name=name + '_relu')
130
+ #act = mx.symbol.Activation(
131
+ # data=bn, act_type=act_type, attr=mirror_attr, name=name+'_relu')
132
+ return act
133
+ else:
134
+ return bn
135
+
136
+
137
+ class CAB:
138
+ def __init__(self, data, nFilters, nModules, n, workspace, name, dilate,
139
+ group):
140
+ self.data = data
141
+ self.nFilters = nFilters
142
+ self.nModules = nModules
143
+ self.n = n
144
+ self.workspace = workspace
145
+ self.name = name
146
+ self.dilate = dilate
147
+ self.group = group
148
+ self.sym_map = {}
149
+
150
+ def get_output(self, w, h):
151
+ key = (w, h)
152
+ if key in self.sym_map:
153
+ return self.sym_map[key]
154
+ ret = None
155
+ if h == self.n:
156
+ if w == self.n:
157
+ ret = (self.data, self.nFilters)
158
+ else:
159
+ x = self.get_output(w + 1, h)
160
+ f = int(x[1] * 0.5)
161
+ if w != self.n - 1:
162
+ body = lin3(x[0], f, self.workspace,
163
+ "%s_w%d_h%d_1" % (self.name, w, h), 3,
164
+ self.group, 1)
165
+ else:
166
+ body = lin3(x[0], f, self.workspace,
167
+ "%s_w%d_h%d_1" % (self.name, w, h), 3,
168
+ self.group, self.dilate)
169
+ ret = (body, f)
170
+ else:
171
+ x = self.get_output(w + 1, h + 1)
172
+ y = self.get_output(w, h + 1)
173
+ if h % 2 == 1 and h != w:
174
+ xbody = lin3(x[0], x[1], self.workspace,
175
+ "%s_w%d_h%d_2" % (self.name, w, h), 3, x[1])
176
+ #xbody = xbody+x[0]
177
+ else:
178
+ xbody = x[0]
179
+ #xbody = x[0]
180
+ #xbody = lin3(x[0], x[1], self.workspace, "%s_w%d_h%d_2"%(self.name, w, h), 3, x[1])
181
+ if w == 0:
182
+ ybody = lin3(y[0], y[1], self.workspace,
183
+ "%s_w%d_h%d_3" % (self.name, w, h), 3, self.group)
184
+ else:
185
+ ybody = y[0]
186
+ ybody = mx.sym.concat(y[0], ybody, dim=1)
187
+ body = mx.sym.add_n(xbody,
188
+ ybody,
189
+ name="%s_w%d_h%d_add" % (self.name, w, h))
190
+ body = body / 2
191
+ ret = (body, x[1])
192
+ self.sym_map[key] = ret
193
+ return ret
194
+
195
+ def get(self):
196
+ return self.get_output(1, 1)[0]
197
+
198
+
199
+ def conv_resnet(data, num_filter, stride, dim_match, name, binarize, dcn,
200
+ dilate, **kwargs):
201
+ bit = 1
202
+ #print('in unit2')
203
+ # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
204
+ bn1 = mx.sym.BatchNorm(data=data,
205
+ fix_gamma=False,
206
+ eps=2e-5,
207
+ momentum=bn_mom,
208
+ name=name + '_bn1')
209
+ if not binarize:
210
+ act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
211
+ conv1 = Conv(data=act1,
212
+ num_filter=int(num_filter * 0.5),
213
+ kernel=(1, 1),
214
+ stride=(1, 1),
215
+ pad=(0, 0),
216
+ no_bias=True,
217
+ workspace=workspace,
218
+ name=name + '_conv1')
219
+ else:
220
+ act1 = mx.sym.QActivation(data=bn1,
221
+ act_bit=ACT_BIT,
222
+ name=name + '_relu1',
223
+ backward_only=True)
224
+ conv1 = mx.sym.QConvolution(data=act1,
225
+ num_filter=int(num_filter * 0.5),
226
+ kernel=(1, 1),
227
+ stride=(1, 1),
228
+ pad=(0, 0),
229
+ no_bias=True,
230
+ workspace=workspace,
231
+ name=name + '_conv1',
232
+ act_bit=ACT_BIT,
233
+ weight_bit=bit)
234
+ bn2 = mx.sym.BatchNorm(data=conv1,
235
+ fix_gamma=False,
236
+ eps=2e-5,
237
+ momentum=bn_mom,
238
+ name=name + '_bn2')
239
+ if not binarize:
240
+ act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
241
+ conv2 = Conv(data=act2,
242
+ num_filter=int(num_filter * 0.5),
243
+ kernel=(3, 3),
244
+ stride=(1, 1),
245
+ pad=(1, 1),
246
+ no_bias=True,
247
+ workspace=workspace,
248
+ name=name + '_conv2')
249
+ else:
250
+ act2 = mx.sym.QActivation(data=bn2,
251
+ act_bit=ACT_BIT,
252
+ name=name + '_relu2',
253
+ backward_only=True)
254
+ conv2 = mx.sym.QConvolution(data=act2,
255
+ num_filter=int(num_filter * 0.5),
256
+ kernel=(3, 3),
257
+ stride=(1, 1),
258
+ pad=(1, 1),
259
+ no_bias=True,
260
+ workspace=workspace,
261
+ name=name + '_conv2',
262
+ act_bit=ACT_BIT,
263
+ weight_bit=bit)
264
+ bn3 = mx.sym.BatchNorm(data=conv2,
265
+ fix_gamma=False,
266
+ eps=2e-5,
267
+ momentum=bn_mom,
268
+ name=name + '_bn3')
269
+ if not binarize:
270
+ act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
271
+ conv3 = Conv(data=act3,
272
+ num_filter=num_filter,
273
+ kernel=(1, 1),
274
+ stride=(1, 1),
275
+ pad=(0, 0),
276
+ no_bias=True,
277
+ workspace=workspace,
278
+ name=name + '_conv3')
279
+ else:
280
+ act3 = mx.sym.QActivation(data=bn3,
281
+ act_bit=ACT_BIT,
282
+ name=name + '_relu3',
283
+ backward_only=True)
284
+ conv3 = mx.sym.QConvolution(data=act3,
285
+ num_filter=num_filter,
286
+ kernel=(1, 1),
287
+ stride=(1, 1),
288
+ pad=(0, 0),
289
+ no_bias=True,
290
+ workspace=workspace,
291
+ name=name + '_conv3',
292
+ act_bit=ACT_BIT,
293
+ weight_bit=bit)
294
+ #if binarize:
295
+ # conv3 = mx.sym.BatchNorm(data=conv3, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn4')
296
+ if dim_match:
297
+ shortcut = data
298
+ else:
299
+ if not binarize:
300
+ shortcut = Conv(data=act1,
301
+ num_filter=num_filter,
302
+ kernel=(1, 1),
303
+ stride=stride,
304
+ no_bias=True,
305
+ workspace=workspace,
306
+ name=name + '_sc')
307
+ else:
308
+ shortcut = mx.sym.QConvolution(data=act1,
309
+ num_filter=num_filter,
310
+ kernel=(1, 1),
311
+ stride=stride,
312
+ pad=(0, 0),
313
+ no_bias=True,
314
+ workspace=workspace,
315
+ name=name + '_sc',
316
+ act_bit=ACT_BIT,
317
+ weight_bit=bit)
318
+ if memonger:
319
+ shortcut._set_attr(mirror_stage='True')
320
+ return conv3 + shortcut
321
+
322
+
323
+ def conv_hpm(data, num_filter, stride, dim_match, name, binarize, dcn,
324
+ dilation, **kwargs):
325
+ bit = 1
326
+ #print('in unit2')
327
+ # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
328
+ bn1 = mx.sym.BatchNorm(data=data,
329
+ fix_gamma=False,
330
+ eps=2e-5,
331
+ momentum=bn_mom,
332
+ name=name + '_bn1')
333
+ if not binarize:
334
+ act1 = Act(data=bn1, act_type='relu', name=name + '_relu1')
335
+ if not dcn:
336
+ conv1 = Conv(data=act1,
337
+ num_filter=int(num_filter * 0.5),
338
+ kernel=(3, 3),
339
+ stride=(1, 1),
340
+ pad=(dilation, dilation),
341
+ dilate=(dilation, dilation),
342
+ no_bias=True,
343
+ workspace=workspace,
344
+ name=name + '_conv1')
345
+ else:
346
+ conv1_offset = mx.symbol.Convolution(name=name + '_conv1_offset',
347
+ data=act1,
348
+ num_filter=18,
349
+ pad=(1, 1),
350
+ kernel=(3, 3),
351
+ stride=(1, 1))
352
+ conv1 = mx.contrib.symbol.DeformableConvolution(
353
+ name=name + '_conv1',
354
+ data=act1,
355
+ offset=conv1_offset,
356
+ num_filter=int(num_filter * 0.5),
357
+ pad=(1, 1),
358
+ kernel=(3, 3),
359
+ num_deformable_group=1,
360
+ stride=(1, 1),
361
+ dilate=(1, 1),
362
+ no_bias=True)
363
+ else:
364
+ act1 = mx.sym.QActivation(data=bn1,
365
+ act_bit=ACT_BIT,
366
+ name=name + '_relu1',
367
+ backward_only=True)
368
+ conv1 = mx.sym.QConvolution_v1(data=act1,
369
+ num_filter=int(num_filter * 0.5),
370
+ kernel=(3, 3),
371
+ stride=(1, 1),
372
+ pad=(1, 1),
373
+ no_bias=True,
374
+ workspace=workspace,
375
+ name=name + '_conv1',
376
+ act_bit=ACT_BIT,
377
+ weight_bit=bit)
378
+ bn2 = mx.sym.BatchNorm(data=conv1,
379
+ fix_gamma=False,
380
+ eps=2e-5,
381
+ momentum=bn_mom,
382
+ name=name + '_bn2')
383
+ if not binarize:
384
+ act2 = Act(data=bn2, act_type='relu', name=name + '_relu2')
385
+ if not dcn:
386
+ conv2 = Conv(data=act2,
387
+ num_filter=int(num_filter * 0.25),
388
+ kernel=(3, 3),
389
+ stride=(1, 1),
390
+ pad=(dilation, dilation),
391
+ dilate=(dilation, dilation),
392
+ no_bias=True,
393
+ workspace=workspace,
394
+ name=name + '_conv2')
395
+ else:
396
+ conv2_offset = mx.symbol.Convolution(name=name + '_conv2_offset',
397
+ data=act2,
398
+ num_filter=18,
399
+ pad=(1, 1),
400
+ kernel=(3, 3),
401
+ stride=(1, 1))
402
+ conv2 = mx.contrib.symbol.DeformableConvolution(
403
+ name=name + '_conv2',
404
+ data=act2,
405
+ offset=conv2_offset,
406
+ num_filter=int(num_filter * 0.25),
407
+ pad=(1, 1),
408
+ kernel=(3, 3),
409
+ num_deformable_group=1,
410
+ stride=(1, 1),
411
+ dilate=(1, 1),
412
+ no_bias=True)
413
+ else:
414
+ act2 = mx.sym.QActivation(data=bn2,
415
+ act_bit=ACT_BIT,
416
+ name=name + '_relu2',
417
+ backward_only=True)
418
+ conv2 = mx.sym.QConvolution_v1(data=act2,
419
+ num_filter=int(num_filter * 0.25),
420
+ kernel=(3, 3),
421
+ stride=(1, 1),
422
+ pad=(1, 1),
423
+ no_bias=True,
424
+ workspace=workspace,
425
+ name=name + '_conv2',
426
+ act_bit=ACT_BIT,
427
+ weight_bit=bit)
428
+ bn3 = mx.sym.BatchNorm(data=conv2,
429
+ fix_gamma=False,
430
+ eps=2e-5,
431
+ momentum=bn_mom,
432
+ name=name + '_bn3')
433
+ if not binarize:
434
+ act3 = Act(data=bn3, act_type='relu', name=name + '_relu3')
435
+ if not dcn:
436
+ conv3 = Conv(data=act3,
437
+ num_filter=int(num_filter * 0.25),
438
+ kernel=(3, 3),
439
+ stride=(1, 1),
440
+ pad=(dilation, dilation),
441
+ dilate=(dilation, dilation),
442
+ no_bias=True,
443
+ workspace=workspace,
444
+ name=name + '_conv3')
445
+ else:
446
+ conv3_offset = mx.symbol.Convolution(name=name + '_conv3_offset',
447
+ data=act3,
448
+ num_filter=18,
449
+ pad=(1, 1),
450
+ kernel=(3, 3),
451
+ stride=(1, 1))
452
+ conv3 = mx.contrib.symbol.DeformableConvolution(
453
+ name=name + '_conv3',
454
+ data=act3,
455
+ offset=conv3_offset,
456
+ num_filter=int(num_filter * 0.25),
457
+ pad=(1, 1),
458
+ kernel=(3, 3),
459
+ num_deformable_group=1,
460
+ stride=(1, 1),
461
+ dilate=(1, 1),
462
+ no_bias=True)
463
+ else:
464
+ act3 = mx.sym.QActivation(data=bn3,
465
+ act_bit=ACT_BIT,
466
+ name=name + '_relu3',
467
+ backward_only=True)
468
+ conv3 = mx.sym.QConvolution_v1(data=act3,
469
+ num_filter=int(num_filter * 0.25),
470
+ kernel=(3, 3),
471
+ stride=(1, 1),
472
+ pad=(1, 1),
473
+ no_bias=True,
474
+ workspace=workspace,
475
+ name=name + '_conv3',
476
+ act_bit=ACT_BIT,
477
+ weight_bit=bit)
478
+ conv4 = mx.symbol.Concat(*[conv1, conv2, conv3])
479
+ if binarize:
480
+ conv4 = mx.sym.BatchNorm(data=conv4,
481
+ fix_gamma=False,
482
+ eps=2e-5,
483
+ momentum=bn_mom,
484
+ name=name + '_bn4')
485
+ if dim_match:
486
+ shortcut = data
487
+ else:
488
+ if not binarize:
489
+ shortcut = Conv(data=act1,
490
+ num_filter=num_filter,
491
+ kernel=(1, 1),
492
+ stride=stride,
493
+ no_bias=True,
494
+ workspace=workspace,
495
+ name=name + '_sc')
496
+ else:
497
+ #assert(False)
498
+ shortcut = mx.sym.QConvolution_v1(data=act1,
499
+ num_filter=num_filter,
500
+ kernel=(1, 1),
501
+ stride=stride,
502
+ pad=(0, 0),
503
+ no_bias=True,
504
+ workspace=workspace,
505
+ name=name + '_sc',
506
+ act_bit=ACT_BIT,
507
+ weight_bit=bit)
508
+ shortcut = mx.sym.BatchNorm(data=shortcut,
509
+ fix_gamma=False,
510
+ eps=2e-5,
511
+ momentum=bn_mom,
512
+ name=name + '_sc_bn')
513
+ if memonger:
514
+ shortcut._set_attr(mirror_stage='True')
515
+ return conv4 + shortcut
516
+ #return bn4 + shortcut
517
+ #return act4 + shortcut
518
+
519
+
520
+ def block17(net,
521
+ input_num_channels,
522
+ scale=1.0,
523
+ with_act=True,
524
+ act_type='relu',
525
+ mirror_attr={},
526
+ name=''):
527
+ tower_conv = ConvFactory(net, 192, (1, 1), name=name + '_conv')
528
+ tower_conv1_0 = ConvFactory(net, 129, (1, 1), name=name + '_conv1_0')
529
+ tower_conv1_1 = ConvFactory(tower_conv1_0,
530
+ 160, (1, 7),
531
+ pad=(1, 2),
532
+ name=name + '_conv1_1')
533
+ tower_conv1_2 = ConvFactory(tower_conv1_1,
534
+ 192, (7, 1),
535
+ pad=(2, 1),
536
+ name=name + '_conv1_2')
537
+ tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_2])
538
+ tower_out = ConvFactory(tower_mixed,
539
+ input_num_channels, (1, 1),
540
+ with_act=False,
541
+ name=name + '_conv_out')
542
+ net = net + scale * tower_out
543
+ if with_act:
544
+ act = mx.symbol.Activation(data=net,
545
+ act_type=act_type,
546
+ attr=mirror_attr)
547
+ return act
548
+ else:
549
+ return net
550
+
551
+
552
+ def block35(net,
553
+ input_num_channels,
554
+ scale=1.0,
555
+ with_act=True,
556
+ act_type='relu',
557
+ mirror_attr={},
558
+ name=''):
559
+ M = 1.0
560
+ tower_conv = ConvFactory(net,
561
+ int(input_num_channels * 0.25 * M), (1, 1),
562
+ name=name + '_conv')
563
+ tower_conv1_0 = ConvFactory(net,
564
+ int(input_num_channels * 0.25 * M), (1, 1),
565
+ name=name + '_conv1_0')
566
+ tower_conv1_1 = ConvFactory(tower_conv1_0,
567
+ int(input_num_channels * 0.25 * M), (3, 3),
568
+ pad=(1, 1),
569
+ name=name + '_conv1_1')
570
+ tower_conv2_0 = ConvFactory(net,
571
+ int(input_num_channels * 0.25 * M), (1, 1),
572
+ name=name + '_conv2_0')
573
+ tower_conv2_1 = ConvFactory(tower_conv2_0,
574
+ int(input_num_channels * 0.375 * M), (3, 3),
575
+ pad=(1, 1),
576
+ name=name + '_conv2_1')
577
+ tower_conv2_2 = ConvFactory(tower_conv2_1,
578
+ int(input_num_channels * 0.5 * M), (3, 3),
579
+ pad=(1, 1),
580
+ name=name + '_conv2_2')
581
+ tower_mixed = mx.symbol.Concat(*[tower_conv, tower_conv1_1, tower_conv2_2])
582
+ tower_out = ConvFactory(tower_mixed,
583
+ input_num_channels, (1, 1),
584
+ with_act=False,
585
+ name=name + '_conv_out')
586
+
587
+ net = net + scale * tower_out
588
+ if with_act:
589
+ act = mx.symbol.Activation(data=net,
590
+ act_type=act_type,
591
+ attr=mirror_attr)
592
+ return act
593
+ else:
594
+ return net
595
+
596
+
597
+ def conv_inception(data, num_filter, stride, dim_match, name, binarize, dcn,
598
+ dilate, **kwargs):
599
+ assert not binarize
600
+ if stride[0] > 1 or not dim_match:
601
+ return conv_resnet(data, num_filter, stride, dim_match, name, binarize,
602
+ dcn, dilate, **kwargs)
603
+ conv4 = block35(data, num_filter, name=name + '_block35')
604
+ return conv4
605
+
606
+
607
+ def conv_cab(data, num_filter, stride, dim_match, name, binarize, dcn, dilate,
608
+ **kwargs):
609
+ if stride[0] > 1 or not dim_match:
610
+ return conv_hpm(data, num_filter, stride, dim_match, name, binarize,
611
+ dcn, dilate, **kwargs)
612
+ cab = CAB(data, num_filter, 1, 4, workspace, name, dilate, 1)
613
+ return cab.get()
614
+
615
+
616
+ def conv_block(data, num_filter, stride, dim_match, name, binarize, dcn,
617
+ dilate):
618
+ if config.net_block == 'resnet':
619
+ return conv_resnet(data, num_filter, stride, dim_match, name, binarize,
620
+ dcn, dilate)
621
+ elif config.net_block == 'inception':
622
+ return conv_inception(data, num_filter, stride, dim_match, name,
623
+ binarize, dcn, dilate)
624
+ elif config.net_block == 'hpm':
625
+ return conv_hpm(data, num_filter, stride, dim_match, name, binarize,
626
+ dcn, dilate)
627
+ elif config.net_block == 'cab':
628
+ return conv_cab(data, num_filter, stride, dim_match, name, binarize,
629
+ dcn, dilate)
630
+
631
+
632
+ def hourglass(data, nFilters, nModules, n, workspace, name, binarize, dcn):
633
+ s = 2
634
+ _dcn = False
635
+ up1 = data
636
+ for i in range(nModules):
637
+ up1 = conv_block(up1, nFilters, (1, 1), True, "%s_up1_%d" % (name, i),
638
+ binarize, _dcn, 1)
639
+ low1 = mx.sym.Pooling(data=data,
640
+ kernel=(s, s),
641
+ stride=(s, s),
642
+ pad=(0, 0),
643
+ pool_type='max')
644
+ for i in range(nModules):
645
+ low1 = conv_block(low1, nFilters, (1, 1), True,
646
+ "%s_low1_%d" % (name, i), binarize, _dcn, 1)
647
+ if n > 1:
648
+ low2 = hourglass(low1, nFilters, nModules, n - 1, workspace,
649
+ "%s_%d" % (name, n - 1), binarize, dcn)
650
+ else:
651
+ low2 = low1
652
+ for i in range(nModules):
653
+ low2 = conv_block(low2, nFilters, (1, 1), True,
654
+ "%s_low2_%d" % (name, i), binarize, _dcn,
655
+ 1) #TODO
656
+ low3 = low2
657
+ for i in range(nModules):
658
+ low3 = conv_block(low3, nFilters, (1, 1), True,
659
+ "%s_low3_%d" % (name, i), binarize, _dcn, 1)
660
+ up2 = mx.symbol.UpSampling(low3,
661
+ scale=s,
662
+ sample_type='nearest',
663
+ workspace=512,
664
+ name='%s_upsampling_%s' % (name, n),
665
+ num_args=1)
666
+ return mx.symbol.add_n(up1, up2)
667
+
668
+
669
+ class STA:
670
+ def __init__(self, data, nFilters, nModules, n, workspace, name):
671
+ self.data = data
672
+ self.nFilters = nFilters
673
+ self.nModules = nModules
674
+ self.n = n
675
+ self.workspace = workspace
676
+ self.name = name
677
+ self.sym_map = {}
678
+
679
+ def get_conv(self, data, name, dilate=1, group=1):
680
+ cab = CAB(data, self.nFilters, self.nModules, 4, self.workspace, name,
681
+ dilate, group)
682
+ return cab.get()
683
+
684
+ def get_output(self, w, h):
685
+ #print(w,h)
686
+ assert w >= 1 and w <= config.net_n + 1
687
+ assert h >= 1 and h <= config.net_n + 1
688
+ s = 2
689
+ bn_mom = 0.9
690
+ key = (w, h)
691
+ if key in self.sym_map:
692
+ return self.sym_map[key]
693
+ ret = None
694
+ if h == self.n:
695
+ if w == self.n:
696
+ ret = self.data, 64
697
+ else:
698
+ x = self.get_output(w + 1, h)
699
+ body = self.get_conv(x[0], "%s_w%d_h%d_1" % (self.name, w, h))
700
+ body = mx.sym.Pooling(data=body,
701
+ kernel=(s, s),
702
+ stride=(s, s),
703
+ pad=(0, 0),
704
+ pool_type='max')
705
+ body = self.get_conv(body, "%s_w%d_h%d_2" % (self.name, w, h))
706
+ ret = body, x[1] // 2
707
+ else:
708
+ x = self.get_output(w + 1, h + 1)
709
+ y = self.get_output(w, h + 1)
710
+
711
+ HC = False
712
+
713
+ if h % 2 == 1 and h != w:
714
+ xbody = lin3(x[0], self.nFilters, self.workspace,
715
+ "%s_w%d_h%d_x" % (self.name, w, h), 3,
716
+ self.nFilters, 1)
717
+ HC = True
718
+ #xbody = x[0]
719
+ else:
720
+ xbody = x[0]
721
+ if x[1] // y[1] == 2:
722
+ if w > 1:
723
+ ybody = mx.symbol.Deconvolution(
724
+ data=y[0],
725
+ num_filter=self.nFilters,
726
+ kernel=(s, s),
727
+ stride=(s, s),
728
+ name='%s_upsampling_w%d_h%d' % (self.name, w, h),
729
+ attr={'lr_mult': '1.0'},
730
+ workspace=self.workspace)
731
+ ybody = mx.sym.BatchNorm(data=ybody,
732
+ fix_gamma=False,
733
+ momentum=bn_mom,
734
+ eps=2e-5,
735
+ name="%s_w%d_h%d_y_bn" %
736
+ (self.name, w, h))
737
+ ybody = Act(data=ybody,
738
+ act_type='relu',
739
+ name="%s_w%d_h%d_y_act" % (self.name, w, h))
740
+ else:
741
+ if h >= 1:
742
+ ybody = mx.symbol.UpSampling(
743
+ y[0],
744
+ scale=s,
745
+ sample_type='nearest',
746
+ workspace=512,
747
+ name='%s_upsampling_w%d_h%d' % (self.name, w, h),
748
+ num_args=1)
749
+ ybody = self.get_conv(
750
+ ybody, "%s_w%d_h%d_4" % (self.name, w, h))
751
+ else:
752
+ ybody = mx.symbol.Deconvolution(
753
+ data=y[0],
754
+ num_filter=self.nFilters,
755
+ kernel=(s, s),
756
+ stride=(s, s),
757
+ name='%s_upsampling_w%d_h%d' % (self.name, w, h),
758
+ attr={'lr_mult': '1.0'},
759
+ workspace=self.workspace)
760
+ ybody = mx.sym.BatchNorm(data=ybody,
761
+ fix_gamma=False,
762
+ momentum=bn_mom,
763
+ eps=2e-5,
764
+ name="%s_w%d_h%d_y_bn" %
765
+ (self.name, w, h))
766
+ ybody = Act(data=ybody,
767
+ act_type='relu',
768
+ name="%s_w%d_h%d_y_act" %
769
+ (self.name, w, h))
770
+ ybody = Conv(data=ybody,
771
+ num_filter=self.nFilters,
772
+ kernel=(3, 3),
773
+ stride=(1, 1),
774
+ pad=(1, 1),
775
+ no_bias=True,
776
+ name="%s_w%d_h%d_y_conv2" %
777
+ (self.name, w, h),
778
+ workspace=self.workspace)
779
+ ybody = mx.sym.BatchNorm(data=ybody,
780
+ fix_gamma=False,
781
+ momentum=bn_mom,
782
+ eps=2e-5,
783
+ name="%s_w%d_h%d_y_bn2" %
784
+ (self.name, w, h))
785
+ ybody = Act(data=ybody,
786
+ act_type='relu',
787
+ name="%s_w%d_h%d_y_act2" %
788
+ (self.name, w, h))
789
+ else:
790
+ ybody = self.get_conv(y[0], "%s_w%d_h%d_5" % (self.name, w, h))
791
+ #if not HC:
792
+ if config.net_sta == 2 and h == 3 and w == 2:
793
+ z = self.get_output(w + 1, h)
794
+ zbody = z[0]
795
+ zbody = mx.sym.Pooling(data=zbody,
796
+ kernel=(z[1], z[1]),
797
+ stride=(z[1], z[1]),
798
+ pad=(0, 0),
799
+ pool_type='avg')
800
+ body = xbody + ybody
801
+ body = body / 2
802
+ body = mx.sym.broadcast_mul(body, zbody)
803
+ else: #sta==1
804
+ body = xbody + ybody
805
+ body = body / 2
806
+ ret = body, x[1]
807
+
808
+ assert ret is not None
809
+ self.sym_map[key] = ret
810
+ return ret
811
+
812
+ def get(self):
813
+ return self.get_output(1, 1)[0]
814
+
815
+
816
+ class SymCoherent:
817
+ def __init__(self, per_batch_size):
818
+ self.per_batch_size = per_batch_size
819
+ self.flip_order = [
820
+ 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 26, 25,
821
+ 24, 23, 22, 21, 20, 19, 18, 17, 27, 28, 29, 30, 35, 34, 33, 32, 31,
822
+ 45, 44, 43, 42, 47, 46, 39, 38, 37, 36, 41, 40, 54, 53, 52, 51, 50,
823
+ 49, 48, 59, 58, 57, 56, 55, 64, 63, 62, 61, 60, 67, 66, 65
824
+ ]
825
+
826
+ def get(self, data):
827
+ #data.shape[0]==per_batch_size
828
+ b = self.per_batch_size // 2
829
+ ux = mx.sym.slice_axis(data, axis=0, begin=0, end=b)
830
+ dx = mx.sym.slice_axis(data, axis=0, begin=b, end=b * 2)
831
+ ux = mx.sym.flip(ux, axis=3)
832
+ #ux = mx.sym.take(ux, indices = self.flip_order, axis=0)
833
+ ux_list = []
834
+ for o in self.flip_order:
835
+ _ux = mx.sym.slice_axis(ux, axis=1, begin=o, end=o + 1)
836
+ ux_list.append(_ux)
837
+ ux = mx.sym.concat(*ux_list, dim=1)
838
+ return ux, dx
839
+
840
+
841
+ def l2_loss(x, y):
842
+ loss = x - y
843
+ loss = mx.symbol.smooth_l1(loss, scalar=1.0)
844
+ #loss = loss*loss
845
+ loss = mx.symbol.mean(loss)
846
+ return loss
847
+
848
+
849
+ def ce_loss(x, y):
850
+ #loss = mx.sym.SoftmaxOutput(data = x, label = y, normalization='valid', multi_output=True)
851
+ x_max = mx.sym.max(x, axis=[2, 3], keepdims=True)
852
+ x = mx.sym.broadcast_minus(x, x_max)
853
+ body = mx.sym.exp(x)
854
+ sums = mx.sym.sum(body, axis=[2, 3], keepdims=True)
855
+ body = mx.sym.broadcast_div(body, sums)
856
+ loss = mx.sym.log(body)
857
+ loss = loss * y * -1.0
858
+ loss = mx.symbol.mean(loss, axis=[1, 2, 3])
859
+ #loss = mx.symbol.mean(loss)
860
+ return loss
861
+
862
+
863
+ def get_symbol(num_classes):
864
+ m = config.multiplier
865
+ sFilters = max(int(64 * m), 32)
866
+ mFilters = max(int(128 * m), 32)
867
+ nFilters = int(256 * m)
868
+
869
+ nModules = 1
870
+ nStacks = config.net_stacks
871
+ binarize = config.net_binarize
872
+ input_size = config.input_img_size
873
+ label_size = config.output_label_size
874
+ use_coherent = config.net_coherent
875
+ use_STA = config.net_sta
876
+ N = config.net_n
877
+ DCN = config.net_dcn
878
+ per_batch_size = config.per_batch_size
879
+ print('binarize', binarize)
880
+ print('use_coherent', use_coherent)
881
+ print('use_STA', use_STA)
882
+ print('use_N', N)
883
+ print('use_DCN', DCN)
884
+ print('per_batch_size', per_batch_size)
885
+ #assert(label_size==64 or label_size==32)
886
+ #assert(input_size==128 or input_size==256)
887
+ coherentor = SymCoherent(per_batch_size)
888
+ D = input_size // label_size
889
+ print(input_size, label_size, D)
890
+ data = mx.sym.Variable(name='data')
891
+ data = data - 127.5
892
+ data = data * 0.0078125
893
+ gt_label = mx.symbol.Variable(name='softmax_label')
894
+ losses = []
895
+ closses = []
896
+ ref_label = gt_label
897
+ if D == 4:
898
+ body = Conv(data=data,
899
+ num_filter=sFilters,
900
+ kernel=(7, 7),
901
+ stride=(2, 2),
902
+ pad=(3, 3),
903
+ no_bias=True,
904
+ name="conv0",
905
+ workspace=workspace)
906
+ else:
907
+ body = Conv(data=data,
908
+ num_filter=sFilters,
909
+ kernel=(3, 3),
910
+ stride=(1, 1),
911
+ pad=(1, 1),
912
+ no_bias=True,
913
+ name="conv0",
914
+ workspace=workspace)
915
+ body = mx.sym.BatchNorm(data=body,
916
+ fix_gamma=False,
917
+ eps=2e-5,
918
+ momentum=bn_mom,
919
+ name='bn0')
920
+ body = Act(data=body, act_type='relu', name='relu0')
921
+
922
+ dcn = False
923
+ body = conv_block(body, mFilters, (1, 1), sFilters == mFilters, 'res0',
924
+ False, dcn, 1)
925
+
926
+ body = mx.sym.Pooling(data=body,
927
+ kernel=(2, 2),
928
+ stride=(2, 2),
929
+ pad=(0, 0),
930
+ pool_type='max')
931
+
932
+ body = conv_block(body, mFilters, (1, 1), True, 'res1', False, dcn,
933
+ 1) #TODO
934
+ body = conv_block(body, nFilters, (1, 1), mFilters == nFilters, 'res2',
935
+ binarize, dcn, 1) #binarize=True?
936
+
937
+ heatmap = None
938
+
939
+ for i in range(nStacks):
940
+ shortcut = body
941
+ if config.net_sta > 0:
942
+ sta = STA(body, nFilters, nModules, config.net_n + 1, workspace,
943
+ 'sta%d' % (i))
944
+ body = sta.get()
945
+ else:
946
+ body = hourglass(body, nFilters, nModules, config.net_n, workspace,
947
+ 'stack%d_hg' % (i), binarize, dcn)
948
+ for j in range(nModules):
949
+ body = conv_block(body, nFilters, (1, 1), True,
950
+ 'stack%d_unit%d' % (i, j), binarize, dcn, 1)
951
+ _dcn = True if config.net_dcn >= 2 else False
952
+ ll = ConvFactory(body,
953
+ nFilters, (1, 1),
954
+ dcn=_dcn,
955
+ name='stack%d_ll' % (i))
956
+ _name = "heatmap%d" % (i) if i < nStacks - 1 else "heatmap"
957
+ _dcn = True if config.net_dcn >= 2 else False
958
+ if not _dcn:
959
+ out = Conv(data=ll,
960
+ num_filter=num_classes,
961
+ kernel=(1, 1),
962
+ stride=(1, 1),
963
+ pad=(0, 0),
964
+ name=_name,
965
+ workspace=workspace)
966
+ else:
967
+ out_offset = mx.symbol.Convolution(name=_name + '_offset',
968
+ data=ll,
969
+ num_filter=18,
970
+ pad=(1, 1),
971
+ kernel=(3, 3),
972
+ stride=(1, 1))
973
+ out = mx.contrib.symbol.DeformableConvolution(
974
+ name=_name,
975
+ data=ll,
976
+ offset=out_offset,
977
+ num_filter=num_classes,
978
+ pad=(1, 1),
979
+ kernel=(3, 3),
980
+ num_deformable_group=1,
981
+ stride=(1, 1),
982
+ dilate=(1, 1),
983
+ no_bias=False)
984
+ #out = Conv(data=ll, num_filter=num_classes, kernel=(3,3), stride=(1,1), pad=(1,1),
985
+ # name=_name, workspace=workspace)
986
+ if i == nStacks - 1:
987
+ heatmap = out
988
+ loss = ce_loss(out, ref_label)
989
+ #loss = loss/nStacks
990
+ #loss = l2_loss(out, ref_label)
991
+ losses.append(loss)
992
+ if config.net_coherent > 0:
993
+ ux, dx = coherentor.get(out)
994
+ closs = l2_loss(ux, dx)
995
+ closs = closs / nStacks
996
+ closses.append(closs)
997
+
998
+ if i < nStacks - 1:
999
+ ll2 = Conv(data=ll,
1000
+ num_filter=nFilters,
1001
+ kernel=(1, 1),
1002
+ stride=(1, 1),
1003
+ pad=(0, 0),
1004
+ name="stack%d_ll2" % (i),
1005
+ workspace=workspace)
1006
+ out2 = Conv(data=out,
1007
+ num_filter=nFilters,
1008
+ kernel=(1, 1),
1009
+ stride=(1, 1),
1010
+ pad=(0, 0),
1011
+ name="stack%d_out2" % (i),
1012
+ workspace=workspace)
1013
+ body = mx.symbol.add_n(shortcut, ll2, out2)
1014
+ _dcn = True if (config.net_dcn == 1
1015
+ or config.net_dcn == 3) else False
1016
+ if _dcn:
1017
+ _name = "stack%d_out3" % (i)
1018
+ out3_offset = mx.symbol.Convolution(name=_name + '_offset',
1019
+ data=body,
1020
+ num_filter=18,
1021
+ pad=(1, 1),
1022
+ kernel=(3, 3),
1023
+ stride=(1, 1))
1024
+ out3 = mx.contrib.symbol.DeformableConvolution(
1025
+ name=_name,
1026
+ data=body,
1027
+ offset=out3_offset,
1028
+ num_filter=nFilters,
1029
+ pad=(1, 1),
1030
+ kernel=(3, 3),
1031
+ num_deformable_group=1,
1032
+ stride=(1, 1),
1033
+ dilate=(1, 1),
1034
+ no_bias=False)
1035
+ body = out3
1036
+
1037
+ pred = mx.symbol.BlockGrad(heatmap)
1038
+ #loss = mx.symbol.add_n(*losses)
1039
+ #loss = mx.symbol.MakeLoss(loss)
1040
+ #syms = [loss]
1041
+ syms = []
1042
+ for loss in losses:
1043
+ loss = mx.symbol.MakeLoss(loss)
1044
+ syms.append(loss)
1045
+ if len(closses) > 0:
1046
+ coherent_weight = 0.0001
1047
+ closs = mx.symbol.add_n(*closses)
1048
+ closs = mx.symbol.MakeLoss(closs, grad_scale=coherent_weight)
1049
+ syms.append(closs)
1050
+ syms.append(pred)
1051
+ sym = mx.symbol.Group(syms)
1052
+ return sym
1053
+
1054
+
1055
+ def init_weights(sym, data_shape_dict):
1056
+ #print('in hg')
1057
+ arg_name = sym.list_arguments()
1058
+ aux_name = sym.list_auxiliary_states()
1059
+ arg_shape, _, aux_shape = sym.infer_shape(**data_shape_dict)
1060
+ arg_shape_dict = dict(zip(arg_name, arg_shape))
1061
+ aux_shape_dict = dict(zip(aux_name, aux_shape))
1062
+ #print(aux_shape)
1063
+ #print(aux_params)
1064
+ #print(arg_shape_dict)
1065
+ arg_params = {}
1066
+ aux_params = {}
1067
+ for k in arg_shape_dict:
1068
+ v = arg_shape_dict[k]
1069
+ #print(k,v)
1070
+ if k.endswith('offset_weight') or k.endswith('offset_bias'):
1071
+ print('initializing', k)
1072
+ arg_params[k] = mx.nd.zeros(shape=v)
1073
+ elif k.startswith('fc6_'):
1074
+ if k.endswith('_weight'):
1075
+ print('initializing', k)
1076
+ arg_params[k] = mx.random.normal(0, 0.01, shape=v)
1077
+ elif k.endswith('_bias'):
1078
+ print('initializing', k)
1079
+ arg_params[k] = mx.nd.zeros(shape=v)
1080
+ elif k.find('upsampling') >= 0:
1081
+ print('initializing upsampling_weight', k)
1082
+ arg_params[k] = mx.nd.zeros(shape=arg_shape_dict[k])
1083
+ init = mx.init.Initializer()
1084
+ init._init_bilinear(k, arg_params[k])
1085
+ return arg_params, aux_params
insightface/alignment/heatmap/test.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import cv2
3
+ import sys
4
+ import numpy as np
5
+ import os
6
+ import mxnet as mx
7
+ import datetime
8
+ import img_helper
9
+ sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'deploy'))
10
+ from mtcnn_detector import MtcnnDetector
11
+
12
+
13
+ class Handler:
14
+ def __init__(self, prefix, epoch, ctx_id=0):
15
+ print('loading', prefix, epoch)
16
+ if ctx_id >= 0:
17
+ ctx = mx.gpu(ctx_id)
18
+ else:
19
+ ctx = mx.cpu()
20
+ sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
21
+ all_layers = sym.get_internals()
22
+ sym = all_layers['heatmap_output']
23
+ image_size = (128, 128)
24
+ self.image_size = image_size
25
+ model = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
26
+ #model = mx.mod.Module(symbol=sym, context=ctx)
27
+ model.bind(for_training=False,
28
+ data_shapes=[('data', (1, 3, image_size[0], image_size[1]))
29
+ ])
30
+ model.set_params(arg_params, aux_params)
31
+ self.model = model
32
+ mtcnn_path = os.path.join(os.path.dirname(__file__), '..', 'deploy',
33
+ 'mtcnn-model')
34
+ self.det_threshold = [0.6, 0.7, 0.8]
35
+ self.detector = MtcnnDetector(model_folder=mtcnn_path,
36
+ ctx=ctx,
37
+ num_worker=1,
38
+ accurate_landmark=True,
39
+ threshold=self.det_threshold)
40
+
41
+ def get(self, img):
42
+ ret = self.detector.detect_face(img, det_type=0)
43
+ if ret is None:
44
+ return None
45
+ bbox, points = ret
46
+ if bbox.shape[0] == 0:
47
+ return None
48
+ bbox = bbox[0, 0:4]
49
+ points = points[0, :].reshape((2, 5)).T
50
+ M = img_helper.estimate_trans_bbox(bbox, self.image_size[0], s=2.0)
51
+ rimg = cv2.warpAffine(img, M, self.image_size, borderValue=0.0)
52
+ img = cv2.cvtColor(rimg, cv2.COLOR_BGR2RGB)
53
+ img = np.transpose(img, (2, 0, 1)) #3*112*112, RGB
54
+ input_blob = np.zeros((1, 3, self.image_size[1], self.image_size[0]),
55
+ dtype=np.uint8)
56
+ input_blob[0] = img
57
+ ta = datetime.datetime.now()
58
+ data = mx.nd.array(input_blob)
59
+ db = mx.io.DataBatch(data=(data, ))
60
+ self.model.forward(db, is_train=False)
61
+ alabel = self.model.get_outputs()[-1].asnumpy()[0]
62
+ tb = datetime.datetime.now()
63
+ print('module time cost', (tb - ta).total_seconds())
64
+ ret = np.zeros((alabel.shape[0], 2), dtype=np.float32)
65
+ for i in range(alabel.shape[0]):
66
+ a = cv2.resize(alabel[i], (self.image_size[1], self.image_size[0]))
67
+ ind = np.unravel_index(np.argmax(a, axis=None), a.shape)
68
+ #ret[i] = (ind[0], ind[1]) #h, w
69
+ ret[i] = (ind[1], ind[0]) #w, h
70
+ return ret, M
71
+
72
+
73
+ ctx_id = 4
74
+ img_path = '../deploy/Tom_Hanks_54745.png'
75
+ img = cv2.imread(img_path)
76
+ #img = np.zeros( (128,128,3), dtype=np.uint8 )
77
+
78
+ handler = Handler('./model/HG', 1, ctx_id)
79
+ for _ in range(10):
80
+ ta = datetime.datetime.now()
81
+ landmark, M = handler.get(img)
82
+ tb = datetime.datetime.now()
83
+ print('get time cost', (tb - ta).total_seconds())
84
+ #visualize landmark
85
+ IM = cv2.invertAffineTransform(M)
86
+ for i in range(landmark.shape[0]):
87
+ p = landmark[i]
88
+ point = np.ones((3, ), dtype=np.float32)
89
+ point[0:2] = p
90
+ point = np.dot(IM, point)
91
+ landmark[i] = point[0:2]
92
+
93
+ for i in range(landmark.shape[0]):
94
+ p = landmark[i]
95
+ point = (int(p[0]), int(p[1]))
96
+ cv2.circle(img, point, 1, (0, 255, 0), 2)
97
+
98
+ filename = './landmark_test.png'
99
+ print('writing', filename)
100
+ cv2.imwrite(filename, img)
insightface/alignment/heatmap/test_rec_nme.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import cv2
3
+ import sys
4
+ import numpy as np
5
+ import os
6
+ import mxnet as mx
7
+ import datetime
8
+ import img_helper
9
+ from config import config
10
+ from data import FaceSegIter
11
+ from metric import LossValueMetric, NMEMetric
12
+
13
+ parser = argparse.ArgumentParser(description='test nme on rec data')
14
+ # general
15
+ parser.add_argument('--rec',
16
+ default='./data_2d/ibug.rec',
17
+ help='rec data path')
18
+ parser.add_argument('--prefix', default='', help='model prefix')
19
+ parser.add_argument('--epoch', type=int, default=1, help='model epoch')
20
+ parser.add_argument('--gpu', type=int, default=0, help='')
21
+ parser.add_argument('--landmark-type', default='2d', help='')
22
+ parser.add_argument('--image-size', type=int, default=128, help='')
23
+ args = parser.parse_args()
24
+
25
+ rec_path = args.rec
26
+ ctx_id = args.gpu
27
+ prefix = args.prefix
28
+ epoch = args.epoch
29
+ image_size = (args.image_size, args.image_size)
30
+ config.landmark_type = args.landmark_type
31
+ config.input_img_size = image_size[0]
32
+
33
+ if ctx_id >= 0:
34
+ ctx = mx.gpu(ctx_id)
35
+ else:
36
+ ctx = mx.cpu()
37
+ sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
38
+ all_layers = sym.get_internals()
39
+ sym = all_layers['heatmap_output']
40
+ #model = mx.mod.Module(symbol=sym, context=ctx, data_names=['data'], label_names=['softmax_label'])
41
+ model = mx.mod.Module(symbol=sym,
42
+ context=ctx,
43
+ data_names=['data'],
44
+ label_names=None)
45
+ #model = mx.mod.Module(symbol=sym, context=ctx)
46
+ model.bind(for_training=False,
47
+ data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
48
+ model.set_params(arg_params, aux_params)
49
+
50
+ val_iter = FaceSegIter(
51
+ path_imgrec=rec_path,
52
+ batch_size=1,
53
+ aug_level=0,
54
+ )
55
+ _metric = NMEMetric()
56
+ #val_metric = mx.metric.create(_metric)
57
+ #val_metric.reset()
58
+ #val_iter.reset()
59
+ nme = []
60
+ for i, eval_batch in enumerate(val_iter):
61
+ if i % 10 == 0:
62
+ print('processing', i)
63
+ #print(eval_batch.data[0].shape, eval_batch.label[0].shape)
64
+ batch_data = mx.io.DataBatch(eval_batch.data)
65
+ model.forward(batch_data, is_train=False)
66
+ #model.update_metric(val_metric, eval_batch.label, True)
67
+ pred_label = model.get_outputs()[-1].asnumpy()
68
+ label = eval_batch.label[0].asnumpy()
69
+ _nme = _metric.cal_nme(label, pred_label)
70
+ nme.append(_nme)
71
+ print(np.mean(nme))
insightface/alignment/heatmap/train.py ADDED
@@ -0,0 +1,236 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+ from __future__ import division
3
+ from __future__ import print_function
4
+
5
+ import logging
6
+ import argparse
7
+ from data import FaceSegIter
8
+ import mxnet as mx
9
+ import mxnet.optimizer as optimizer
10
+ import numpy as np
11
+ import os
12
+ import sys
13
+ import math
14
+ import random
15
+ import cv2
16
+ from config import config, default, generate_config
17
+ from optimizer import ONadam
18
+ from metric import LossValueMetric, NMEMetric
19
+ sys.path.append(os.path.join(os.path.dirname(__file__), 'symbol'))
20
+ import sym_heatmap
21
+ #import sym_fc
22
+ #from symbol import fc
23
+
24
+ args = None
25
+ logger = logging.getLogger()
26
+ logger.setLevel(logging.INFO)
27
+
28
+
29
+ def main(args):
30
+ _seed = 727
31
+ random.seed(_seed)
32
+ np.random.seed(_seed)
33
+ mx.random.seed(_seed)
34
+ ctx = []
35
+ cvd = os.environ['CUDA_VISIBLE_DEVICES'].strip()
36
+ if len(cvd) > 0:
37
+ for i in range(len(cvd.split(','))):
38
+ ctx.append(mx.gpu(i))
39
+ if len(ctx) == 0:
40
+ ctx = [mx.cpu()]
41
+ print('use cpu')
42
+ else:
43
+ print('gpu num:', len(ctx))
44
+ #ctx = [mx.gpu(0)]
45
+ args.ctx_num = len(ctx)
46
+
47
+ args.batch_size = args.per_batch_size * args.ctx_num
48
+ config.per_batch_size = args.per_batch_size
49
+
50
+ print('Call with', args, config)
51
+ train_iter = FaceSegIter(
52
+ path_imgrec=os.path.join(config.dataset_path, 'train.rec'),
53
+ batch_size=args.batch_size,
54
+ per_batch_size=args.per_batch_size,
55
+ aug_level=1,
56
+ exf=args.exf,
57
+ args=args,
58
+ )
59
+
60
+ data_shape = train_iter.get_data_shape()
61
+ #label_shape = train_iter.get_label_shape()
62
+ sym = sym_heatmap.get_symbol(num_classes=config.num_classes)
63
+ if len(args.pretrained) == 0:
64
+ #data_shape_dict = {'data' : (args.per_batch_size,)+data_shape, 'softmax_label' : (args.per_batch_size,)+label_shape}
65
+ data_shape_dict = train_iter.get_shape_dict()
66
+ arg_params, aux_params = sym_heatmap.init_weights(sym, data_shape_dict)
67
+ else:
68
+ vec = args.pretrained.split(',')
69
+ print('loading', vec)
70
+ _, arg_params, aux_params = mx.model.load_checkpoint(
71
+ vec[0], int(vec[1]))
72
+ #sym, arg_params, aux_params = get_symbol(args, arg_params, aux_params)
73
+
74
+ model = mx.mod.Module(
75
+ context=ctx,
76
+ symbol=sym,
77
+ label_names=train_iter.get_label_names(),
78
+ )
79
+ #lr = 1.0e-3
80
+ #lr = 2.5e-4
81
+ _rescale_grad = 1.0 / args.ctx_num
82
+ #_rescale_grad = 1.0/args.batch_size
83
+ #lr = args.lr
84
+ #opt = optimizer.Nadam(learning_rate=args.lr, wd=args.wd, rescale_grad=_rescale_grad, clip_gradient=5.0)
85
+ if args.optimizer == 'onadam':
86
+ opt = ONadam(learning_rate=args.lr,
87
+ wd=args.wd,
88
+ rescale_grad=_rescale_grad,
89
+ clip_gradient=5.0)
90
+ elif args.optimizer == 'nadam':
91
+ opt = optimizer.Nadam(learning_rate=args.lr,
92
+ rescale_grad=_rescale_grad)
93
+ elif args.optimizer == 'rmsprop':
94
+ opt = optimizer.RMSProp(learning_rate=args.lr,
95
+ rescale_grad=_rescale_grad)
96
+ elif args.optimizer == 'adam':
97
+ opt = optimizer.Adam(learning_rate=args.lr, rescale_grad=_rescale_grad)
98
+ else:
99
+ opt = optimizer.SGD(learning_rate=args.lr,
100
+ momentum=0.9,
101
+ wd=args.wd,
102
+ rescale_grad=_rescale_grad)
103
+ initializer = mx.init.Xavier(rnd_type='gaussian',
104
+ factor_type="in",
105
+ magnitude=2)
106
+ _cb = mx.callback.Speedometer(args.batch_size, args.frequent)
107
+ _metric = LossValueMetric()
108
+ #_metric = NMEMetric()
109
+ #_metric2 = AccMetric()
110
+ #eval_metrics = [_metric, _metric2]
111
+ eval_metrics = [_metric]
112
+ lr_steps = [int(x) for x in args.lr_step.split(',')]
113
+ print('lr-steps', lr_steps)
114
+ global_step = [0]
115
+
116
+ def val_test():
117
+ all_layers = sym.get_internals()
118
+ vsym = all_layers['heatmap_output']
119
+ vmodel = mx.mod.Module(symbol=vsym, context=ctx, label_names=None)
120
+ #model.bind(data_shapes=[('data', (args.batch_size, 3, image_size[0], image_size[1]))], label_shapes=[('softmax_label', (args.batch_size,))])
121
+ vmodel.bind(data_shapes=[('data', (args.batch_size, ) + data_shape)])
122
+ arg_params, aux_params = model.get_params()
123
+ vmodel.set_params(arg_params, aux_params)
124
+ for target in config.val_targets:
125
+ _file = os.path.join(config.dataset_path, '%s.rec' % target)
126
+ if not os.path.exists(_file):
127
+ continue
128
+ val_iter = FaceSegIter(
129
+ path_imgrec=_file,
130
+ batch_size=args.batch_size,
131
+ #batch_size = 4,
132
+ aug_level=0,
133
+ args=args,
134
+ )
135
+ _metric = NMEMetric()
136
+ val_metric = mx.metric.create(_metric)
137
+ val_metric.reset()
138
+ val_iter.reset()
139
+ for i, eval_batch in enumerate(val_iter):
140
+ #print(eval_batch.data[0].shape, eval_batch.label[0].shape)
141
+ batch_data = mx.io.DataBatch(eval_batch.data)
142
+ model.forward(batch_data, is_train=False)
143
+ model.update_metric(val_metric, eval_batch.label)
144
+ nme_value = val_metric.get_name_value()[0][1]
145
+ print('[%d][%s]NME: %f' % (global_step[0], target, nme_value))
146
+
147
+ def _batch_callback(param):
148
+ _cb(param)
149
+ global_step[0] += 1
150
+ mbatch = global_step[0]
151
+ for _lr in lr_steps:
152
+ if mbatch == _lr:
153
+ opt.lr *= 0.2
154
+ print('lr change to', opt.lr)
155
+ break
156
+ if mbatch % 1000 == 0:
157
+ print('lr-batch-epoch:', opt.lr, param.nbatch, param.epoch)
158
+ if mbatch > 0 and mbatch % args.verbose == 0:
159
+ val_test()
160
+ if args.ckpt == 1:
161
+ msave = mbatch // args.verbose
162
+ print('saving', msave)
163
+ arg, aux = model.get_params()
164
+ mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg,
165
+ aux)
166
+ if mbatch == lr_steps[-1]:
167
+ if args.ckpt == 2:
168
+ #msave = mbatch//args.verbose
169
+ msave = 1
170
+ print('saving', msave)
171
+ arg, aux = model.get_params()
172
+ mx.model.save_checkpoint(args.prefix, msave, model.symbol, arg,
173
+ aux)
174
+ sys.exit(0)
175
+
176
+ train_iter = mx.io.PrefetchingIter(train_iter)
177
+
178
+ model.fit(
179
+ train_iter,
180
+ begin_epoch=0,
181
+ num_epoch=9999,
182
+ #eval_data = val_iter,
183
+ eval_data=None,
184
+ eval_metric=eval_metrics,
185
+ kvstore='device',
186
+ optimizer=opt,
187
+ initializer=initializer,
188
+ arg_params=arg_params,
189
+ aux_params=aux_params,
190
+ allow_missing=True,
191
+ batch_end_callback=_batch_callback,
192
+ epoch_end_callback=None,
193
+ )
194
+
195
+
196
+ if __name__ == '__main__':
197
+ parser = argparse.ArgumentParser(description='Train face alignment')
198
+ # general
199
+ parser.add_argument('--network',
200
+ help='network name',
201
+ default=default.network,
202
+ type=str)
203
+ parser.add_argument('--dataset',
204
+ help='dataset name',
205
+ default=default.dataset,
206
+ type=str)
207
+ args, rest = parser.parse_known_args()
208
+ generate_config(args.network, args.dataset)
209
+ parser.add_argument('--prefix',
210
+ default=default.prefix,
211
+ help='directory to save model.')
212
+ parser.add_argument('--pretrained', default=default.pretrained, help='')
213
+ parser.add_argument('--optimizer', default='nadam', help='')
214
+ parser.add_argument('--lr', type=float, default=default.lr, help='')
215
+ parser.add_argument('--wd', type=float, default=default.wd, help='')
216
+ parser.add_argument('--per-batch-size',
217
+ type=int,
218
+ default=default.per_batch_size,
219
+ help='')
220
+ parser.add_argument('--lr-step',
221
+ help='learning rate steps (in epoch)',
222
+ default=default.lr_step,
223
+ type=str)
224
+ parser.add_argument('--ckpt', type=int, default=1, help='')
225
+ parser.add_argument('--norm', type=int, default=0, help='')
226
+ parser.add_argument('--exf', type=int, default=1, help='')
227
+ parser.add_argument('--frequent',
228
+ type=int,
229
+ default=default.frequent,
230
+ help='')
231
+ parser.add_argument('--verbose',
232
+ type=int,
233
+ default=default.verbose,
234
+ help='')
235
+ args = parser.parse_args()
236
+ main(args)
insightface/alignment/synthetics/README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Introduction
2
+
3
+ We provide training and testing tools on synthetics data.
4
+
5
+
6
+ ## Dataset
7
+
8
+ ### Training dataset
9
+
10
+ Download `Face Synthetics dataset` from [https://github.com/microsoft/FaceSynthetics](https://github.com/microsoft/FaceSynthetics) and put it somewhere.
11
+
12
+ <div align="left">
13
+ <img src="https://github.com/microsoft/FaceSynthetics/raw/main/docs/img/dataset_samples_2.jpg" width="640"/>
14
+ </div>
15
+ <br/>
16
+
17
+ Then use [tools/prepare_synthetics.py](tools/prepare_synthetics.py) for training data preparation.
18
+
19
+
20
+ ### Testing dataset
21
+
22
+ [300-W](https://ibug.doc.ic.ac.uk/resources/300-W/)
23
+
24
+
25
+ ## Pretrained Model
26
+
27
+ [ResNet50d](https://drive.google.com/file/d/1kNP7qEl3AYNbaHFUg_ZiyRB1CtfDWXR4/view?usp=sharing)
28
+
29
+
30
+ ## Train and Test
31
+
32
+ ### Prerequisites
33
+
34
+ - pytorch_lightning
35
+ - timm
36
+ - albumentations
37
+
38
+ ### Training
39
+
40
+ `` python -u trainer_synthetics.py ``
41
+
42
+ which uses `resnet50d` as backbone by default, please check the [code](trainer_synthetics.py) for detail.
43
+
44
+ ### Testing
45
+
46
+ Please check [test_synthetics.py](test_synthetics.py) for detail.
47
+
48
+
49
+ ## Result Visualization(3D 68 Keypoints)
50
+
51
+ <div align="left">
52
+ <img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/image_008_1.jpg?raw=true" width="320"/>
53
+ </div>
54
+
55
+ <div align="left">
56
+ <img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/image_017_1.jpg?raw=true" width="320"/>
57
+ </div>
58
+
59
+ <div align="left">
60
+ <img src="https://github.com/nttstar/insightface-resources/blob/master/alignment/images/image_039.jpg?raw=true" width="320"/>
61
+ </div>
62
+
63
+
insightface/alignment/synthetics/datasets/augs.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import albumentations as A
3
+ from albumentations.core.transforms_interface import ImageOnlyTransform
4
+
5
+ class RectangleBorderAugmentation(ImageOnlyTransform):
6
+
7
+ def __init__(
8
+ self,
9
+ fill_value = 0,
10
+ limit = 0.3,
11
+ always_apply=False,
12
+ p=1.0,
13
+ ):
14
+ super(RectangleBorderAugmentation, self).__init__(always_apply, p)
15
+ assert limit>0.0 and limit<1.0
16
+ self.fill_value = 0
17
+ self.limit = limit
18
+
19
+
20
+ def apply(self, image, border_size_limit, **params):
21
+ assert len(border_size_limit)==4
22
+ border_size = border_size_limit.copy()
23
+ border_size[0] *= image.shape[1]
24
+ border_size[2] *= image.shape[1]
25
+ border_size[1] *= image.shape[0]
26
+ border_size[3] *= image.shape[0]
27
+ border_size = border_size.astype(np.int)
28
+ image[:,:border_size[0],:] = self.fill_value
29
+ image[:border_size[1],:,:] = self.fill_value
30
+ image[:,-border_size[2]:,:] = self.fill_value
31
+ image[-border_size[3]:,:,:] = self.fill_value
32
+ return image
33
+
34
+ def get_params(self):
35
+ border_size_limit = np.random.uniform(0.0, self.limit, size=4)
36
+ return {'border_size_limit': border_size_limit}
37
+
38
+ def get_transform_init_args_names(self):
39
+ return ('fill_value', 'limit')
40
+
insightface/alignment/synthetics/datasets/dataset_synthetics.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import os.path as osp
3
+ import queue as Queue
4
+ import pickle
5
+ import threading
6
+ import logging
7
+ import numpy as np
8
+ import torch
9
+ from torch.utils.data import DataLoader, Dataset
10
+ from torchvision import transforms
11
+ import cv2
12
+ import albumentations as A
13
+ from albumentations.pytorch import ToTensorV2
14
+ from .augs import RectangleBorderAugmentation
15
+
16
+ class BackgroundGenerator(threading.Thread):
17
+ def __init__(self, generator, local_rank, max_prefetch=6):
18
+ super(BackgroundGenerator, self).__init__()
19
+ self.queue = Queue.Queue(max_prefetch)
20
+ self.generator = generator
21
+ self.local_rank = local_rank
22
+ self.daemon = True
23
+ self.start()
24
+
25
+ def run(self):
26
+ torch.cuda.set_device(self.local_rank)
27
+ for item in self.generator:
28
+ self.queue.put(item)
29
+ self.queue.put(None)
30
+
31
+ def next(self):
32
+ next_item = self.queue.get()
33
+ if next_item is None:
34
+ raise StopIteration
35
+ return next_item
36
+
37
+ def __next__(self):
38
+ return self.next()
39
+
40
+ def __iter__(self):
41
+ return self
42
+
43
+
44
+ class DataLoaderX(DataLoader):
45
+ def __init__(self, local_rank, **kwargs):
46
+ super(DataLoaderX, self).__init__(**kwargs)
47
+ self.stream = torch.cuda.Stream(local_rank)
48
+ self.local_rank = local_rank
49
+
50
+ def __iter__(self):
51
+ self.iter = super(DataLoaderX, self).__iter__()
52
+ self.iter = BackgroundGenerator(self.iter, self.local_rank)
53
+ self.preload()
54
+ return self
55
+
56
+ def preload(self):
57
+ self.batch = next(self.iter, None)
58
+ if self.batch is None:
59
+ return None
60
+ with torch.cuda.stream(self.stream):
61
+ for k in range(len(self.batch)):
62
+ self.batch[k] = self.batch[k].to(device=self.local_rank,
63
+ non_blocking=True)
64
+
65
+ def __next__(self):
66
+ torch.cuda.current_stream().wait_stream(self.stream)
67
+ batch = self.batch
68
+ if batch is None:
69
+ raise StopIteration
70
+ self.preload()
71
+ return batch
72
+
73
+
74
+
75
+ class FaceDataset(Dataset):
76
+ def __init__(self, root_dir, is_train):
77
+ super(FaceDataset, self).__init__()
78
+
79
+ #self.local_rank = local_rank
80
+ self.is_train = is_train
81
+ self.input_size = 256
82
+ self.num_kps = 68
83
+ transform_list = []
84
+ if is_train:
85
+ transform_list += \
86
+ [
87
+ A.ColorJitter(brightness=0.8, contrast=0.5, p=0.5),
88
+ A.ToGray(p=0.1),
89
+ A.ISONoise(p=0.1),
90
+ A.MedianBlur(blur_limit=(1,7), p=0.1),
91
+ A.GaussianBlur(blur_limit=(1,7), p=0.1),
92
+ A.MotionBlur(blur_limit=(5,12), p=0.1),
93
+ A.ImageCompression(quality_lower=50, quality_upper=90, p=0.05),
94
+ A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.2, rotate_limit=40, interpolation=cv2.INTER_LINEAR,
95
+ border_mode=cv2.BORDER_CONSTANT, value=0, mask_value=0, p=0.8),
96
+ A.HorizontalFlip(p=0.5),
97
+ RectangleBorderAugmentation(limit=0.33, fill_value=0, p=0.2),
98
+ ]
99
+ transform_list += \
100
+ [
101
+ A.geometric.resize.Resize(self.input_size, self.input_size, interpolation=cv2.INTER_LINEAR, always_apply=True),
102
+ A.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
103
+ ToTensorV2(),
104
+ ]
105
+ self.transform = A.ReplayCompose(
106
+ transform_list,
107
+ keypoint_params=A.KeypointParams(format='xy', remove_invisible=False)
108
+ )
109
+ self.root_dir = root_dir
110
+ with open(osp.join(root_dir, 'annot.pkl'), 'rb') as f:
111
+ annot = pickle.load(f)
112
+ self.X, self.Y = annot
113
+ train_size = int(len(self.X)*0.99)
114
+
115
+ if is_train:
116
+ self.X = self.X[:train_size]
117
+ self.Y = self.Y[:train_size]
118
+ else:
119
+ self.X = self.X[train_size:]
120
+ self.Y = self.Y[train_size:]
121
+ #if local_rank==0:
122
+ # logging.info('data_transform_list:%s'%transform_list)
123
+ flip_parts = ([1, 17], [2, 16], [3, 15], [4, 14], [5, 13], [6, 12], [7, 11], [8, 10],
124
+ [18, 27], [19, 26], [20, 25], [21, 24], [22, 23],
125
+ [32, 36], [33, 35],
126
+ [37, 46], [38, 45], [39, 44], [40, 43], [41, 48], [42, 47],
127
+ [49, 55], [50, 54], [51, 53], [62, 64], [61, 65], [68, 66], [59, 57], [60, 56])
128
+ self.flip_order = np.arange(self.num_kps)
129
+ for pair in flip_parts:
130
+ self.flip_order[pair[1]-1] = pair[0]-1
131
+ self.flip_order[pair[0]-1] = pair[1]-1
132
+ logging.info('len:%d'%len(self.X))
133
+ print('!!!len:%d'%len(self.X))
134
+
135
+ def __getitem__(self, index):
136
+ x = self.X[index]
137
+ y = self.Y[index]
138
+ image_path = os.path.join(self.root_dir, x)
139
+ img = cv2.imread(image_path)[:,:,::-1]
140
+ label = y
141
+ if self.transform is not None:
142
+ t = self.transform(image=img, keypoints=label)
143
+ flipped = False
144
+ for trans in t["replay"]["transforms"]:
145
+ if trans["__class_fullname__"].endswith('HorizontalFlip'):
146
+ if trans["applied"]:
147
+ flipped = True
148
+ img = t['image']
149
+ label = t['keypoints']
150
+ label = np.array(label, dtype=np.float32)
151
+ #print(img.shape)
152
+ if flipped:
153
+ #label[:, 0] = self.input_size - 1 - label[:, 0] #already applied in horizantal flip aug
154
+ label = label[self.flip_order,:]
155
+ label /= (self.input_size/2)
156
+ label -= 1.0
157
+ label = label.flatten()
158
+ label = torch.tensor(label, dtype=torch.float32)
159
+ return img, label
160
+
161
+ def __len__(self):
162
+ return len(self.X)
163
+
insightface/alignment/synthetics/test_synthetics.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from trainer_synthetics import FaceSynthetics
3
+ import sys
4
+ import glob
5
+ import torch
6
+ import os
7
+ import numpy as np
8
+ import cv2
9
+ import os.path as osp
10
+ import insightface
11
+ from insightface.app import FaceAnalysis
12
+ from insightface.utils import face_align
13
+
14
+ flip_parts = ([1, 17], [2, 16], [3, 15], [4, 14], [5, 13], [6, 12], [7, 11], [8, 10],
15
+ [18, 27], [19, 26], [20, 25], [21, 24], [22, 23],
16
+ [32, 36], [33, 35],
17
+ [37, 46], [38, 45], [39, 44], [40, 43], [41, 48], [42, 47],
18
+ [49, 55], [50, 54], [51, 53], [62, 64], [61, 65], [68, 66], [59, 57], [60, 56])
19
+
20
+ app = FaceAnalysis()
21
+ app.prepare(ctx_id=0, det_size=(224, 224))
22
+ input_size = 256
23
+ USE_FLIP = False
24
+
25
+ root = 'data/300W/Validation'
26
+ output_dir = 'outputs/'
27
+
28
+ if not osp.exists(output_dir):
29
+ os.makedirs(output_dir)
30
+
31
+ outf = open(osp.join(output_dir, 'pred.txt'), 'w')
32
+
33
+ model = FaceSynthetics.load_from_checkpoint(sys.argv[1]).cuda()
34
+ model.eval()
35
+ for line in open(osp.join(root, '300W_validation.txt'), 'r'):
36
+ line = line.strip().split()
37
+ img_path = osp.join(root, line[0])
38
+ gt = line[1:]
39
+ #print(len(gt))
40
+ name = img_path.split('/')[-1]
41
+ img = cv2.imread(img_path)
42
+ dimg = img.copy()
43
+ faces = app.get(img, max_num=1)
44
+ if len(faces)!=1:
45
+ continue
46
+ bbox = faces[0].bbox
47
+ w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
48
+ center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
49
+ rotate = 0
50
+ _scale = input_size / (max(w, h)*1.5)
51
+ aimg, M = face_align.transform(img, center, input_size, _scale, rotate)
52
+ #cv2.imwrite("outputs/a_%s"%name, aimg)
53
+ aimg = cv2.cvtColor(aimg, cv2.COLOR_BGR2RGB)
54
+ kps = None
55
+ flips = [0, 1] if USE_FLIP else [0]
56
+ for flip in flips:
57
+ input = aimg.copy()
58
+ if flip:
59
+ input = input[:,::-1,:].copy()
60
+ input = np.transpose(input, (2, 0, 1))
61
+ input = np.expand_dims(input, 0)
62
+ imgs = torch.Tensor(input).cuda()
63
+ imgs.div_(255).sub_(0.5).div_(0.5)
64
+ pred = model(imgs).detach().cpu().numpy().flatten().reshape( (-1, 2) )
65
+ pred[:, 0:2] += 1
66
+ pred[:, 0:2] *= (input_size // 2)
67
+ if flip:
68
+ pred_flip = pred.copy()
69
+ pred_flip[:, 0] = input_size - 1 - pred_flip[:, 0]
70
+ for pair in flip_parts:
71
+ tmp = pred_flip[pair[0] - 1, :].copy()
72
+ pred_flip[pair[0] - 1, :] = pred_flip[pair[1] - 1, :]
73
+ pred_flip[pair[1] - 1, :] = tmp
74
+ pred = pred_flip
75
+ if kps is None:
76
+ kps = pred
77
+ else:
78
+ kps += pred
79
+ kps /= 2.0
80
+ #print(pred.shape)
81
+
82
+ IM = cv2.invertAffineTransform(M)
83
+ kps = face_align.trans_points(kps, IM)
84
+ outf.write(line[0])
85
+ outf.write(' ')
86
+ outf.write(' '.join(["%.5f"%x for x in kps.flatten()]))
87
+ outf.write("\n")
88
+ box = bbox.astype(np.int)
89
+ color = (0, 0, 255)
90
+ cv2.rectangle(dimg, (box[0], box[1]), (box[2], box[3]), color, 2)
91
+ kps = kps.astype(np.int)
92
+ #print(landmark.shape)
93
+ for l in range(kps.shape[0]):
94
+ color = (0, 0, 255)
95
+ cv2.circle(dimg, (kps[l][0], kps[l][1]), 1, color, 2)
96
+
97
+ cv2.imwrite("outputs/%s"%name, dimg)
98
+
99
+ #ret = np.argmax(feat)
100
+ #print(feat)
101
+ #outf.write("%s %.4f %.4f %.4f\n"%(line[0], feat[0], feat[1], feat[2]))
102
+
103
+ outf.close()
104
+
insightface/alignment/synthetics/tools/prepare_synthetics.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import sys
3
+ import glob
4
+ import torch
5
+ import pickle
6
+ import os
7
+ import numpy as np
8
+ import cv2
9
+ import os.path as osp
10
+ import insightface
11
+ from insightface.app import FaceAnalysis
12
+ from insightface.utils import face_align
13
+
14
+ app = FaceAnalysis()
15
+ app.prepare(ctx_id=0, det_size=(224, 224))
16
+ output_size = 384
17
+
18
+ input_dir = '/root/codebase/FaceSynthetics'
19
+ output_dir = 'data/synthetics'
20
+
21
+ if not osp.exists(output_dir):
22
+ os.makedirs(output_dir)
23
+
24
+ X = []
25
+ Y = []
26
+
27
+ for i in range(0, 100000):
28
+ if i%1000==0:
29
+ print('loading', i)
30
+ x = "%06d.png"%i
31
+ img_path = osp.join(input_dir, x)
32
+ img = cv2.imread(img_path)
33
+ dimg = img.copy()
34
+ ylines = open(osp.join(input_dir, "%06d_ldmks.txt"%i)).readlines()
35
+ ylines = ylines[:68]
36
+ y = []
37
+ for yline in ylines:
38
+ lmk = [float(x) for x in yline.strip().split()]
39
+ y.append( tuple(lmk) )
40
+ pred = np.array(y)
41
+ faces = app.get(img, max_num=1)
42
+ if len(faces)!=1:
43
+ continue
44
+ bbox = faces[0].bbox
45
+ w, h = (bbox[2] - bbox[0]), (bbox[3] - bbox[1])
46
+ center = (bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2
47
+ rotate = 0
48
+ _scale = output_size / (max(w, h)*1.5)
49
+ aimg, M = face_align.transform(dimg, center, output_size, _scale, rotate)
50
+ pred = face_align.trans_points(pred, M)
51
+ #box = bbox.astype(np.int)
52
+ #color = (0, 0, 255)
53
+ #cv2.rectangle(dimg, (box[0], box[1]), (box[2], box[3]), color, 2)
54
+
55
+ #kps = pred.astype(np.int)
56
+ #for l in range(kps.shape[0]):
57
+ # color = (0, 0, 255)
58
+ # cv2.circle(aimg, (kps[l][0], kps[l][1]), 1, color, 2)
59
+ x = x.replace('png', 'jpg')
60
+ X.append(x)
61
+ y = []
62
+ for k in range(pred.shape[0]):
63
+ y.append( (pred[k][0], pred[k][1]) )
64
+ Y.append(y)
65
+ cv2.imwrite("%s/%s"%(output_dir, x), aimg)
66
+
67
+
68
+ with open(osp.join(output_dir, 'annot.pkl'), 'wb') as pfile:
69
+ pickle.dump((X, Y), pfile, protocol=pickle.HIGHEST_PROTOCOL)
70
+
insightface/alignment/synthetics/trainer_synthetics.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from argparse import ArgumentParser
2
+
3
+ import os
4
+ import os.path as osp
5
+ import torch
6
+ import torch.nn as nn
7
+ from torch.nn import functional as F
8
+ from torch.utils.data import DataLoader
9
+ import pytorch_lightning as pl
10
+ from pytorch_lightning.callbacks import ModelCheckpoint
11
+ from pytorch_lightning.callbacks import LearningRateMonitor
12
+ from pytorch_lightning.loggers import TensorBoardLogger
13
+ import timm
14
+ from datasets.dataset_synthetics import FaceDataset, DataLoaderX
15
+
16
+
17
+ class FaceSynthetics(pl.LightningModule):
18
+ def __init__(self, backbone):
19
+ super().__init__()
20
+ self.save_hyperparameters()
21
+ backbone = timm.create_model(backbone, num_classes=68*2)
22
+ self.backbone = backbone
23
+ self.loss = nn.L1Loss(reduction='mean')
24
+ self.hard_mining = False
25
+
26
+ def forward(self, x):
27
+ # use forward for inference/predictions
28
+ y = self.backbone(x)
29
+ return y
30
+
31
+ def training_step(self, batch, batch_idx):
32
+ x, y = batch
33
+ y_hat = self.backbone(x)
34
+ if self.hard_mining:
35
+ loss = torch.abs(y_hat - y) #(B,K)
36
+ loss = torch.mean(loss, dim=1) #(B,)
37
+ B = len(loss)
38
+ S = int(B*0.5)
39
+ loss, _ = torch.sort(loss, descending=True)
40
+ loss = loss[:S]
41
+ loss = torch.mean(loss) * 5.0
42
+ else:
43
+ loss = self.loss(y_hat, y) * 5.0
44
+ self.log('train_loss', loss, on_epoch=True)
45
+ return loss
46
+
47
+ def validation_step(self, batch, batch_idx):
48
+ x, y = batch
49
+ y_hat = self.backbone(x)
50
+ loss = self.loss(y_hat, y)
51
+ self.log('val_loss', loss, on_step=True)
52
+
53
+ def test_step(self, batch, batch_idx):
54
+ x, y = batch
55
+ y_hat = self.backbone(x)
56
+ loss = self.loss(y_hat, y)
57
+ self.log('test_loss', loss)
58
+
59
+ def configure_optimizers(self):
60
+ #return torch.optim.Adam(self.parameters(), lr=0.0002)
61
+ opt = torch.optim.SGD(self.parameters(), lr = 0.1, momentum=0.9, weight_decay = 0.0005)
62
+ def lr_step_func(epoch):
63
+ return 0.1 ** len([m for m in [15, 25, 28] if m <= epoch])
64
+ scheduler = torch.optim.lr_scheduler.LambdaLR(
65
+ optimizer=opt, lr_lambda=lr_step_func)
66
+ lr_scheduler = {
67
+ 'scheduler': scheduler,
68
+ 'name': 'learning_rate',
69
+ 'interval':'epoch',
70
+ 'frequency': 1}
71
+ return [opt], [lr_scheduler]
72
+
73
+
74
+
75
+ def cli_main():
76
+ pl.seed_everything(727)
77
+
78
+ # ------------
79
+ # args
80
+ # ------------
81
+ parser = ArgumentParser()
82
+ parser.add_argument('--backbone', default='resnet50d', type=str)
83
+ parser.add_argument('--batch_size', default=64, type=int)
84
+ parser.add_argument('--root', default='data/synthetics', type=str)
85
+ parser.add_argument('--num-gpus', default=2, type=int)
86
+ parser.add_argument('--tf32', action='store_true')
87
+ parser = pl.Trainer.add_argparse_args(parser)
88
+ args = parser.parse_args()
89
+
90
+ if not args.tf32:
91
+ torch.backends.cuda.matmul.allow_tf32 = False
92
+ torch.backends.cudnn.allow_tf32 = False
93
+ else:
94
+ torch.backends.cuda.matmul.allow_tf32 = True
95
+ torch.backends.cudnn.allow_tf32 = True
96
+ torch.backends.cudnn.benchmark = True
97
+
98
+ # ------------
99
+ # data
100
+ # ------------
101
+ train_set = FaceDataset(root_dir=args.root, is_train=True)
102
+ val_set = FaceDataset(root_dir=args.root, is_train=False)
103
+
104
+ train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=3, pin_memory=True)
105
+ val_loader = DataLoader(val_set, batch_size=args.batch_size, shuffle=False)
106
+
107
+ # ------------
108
+ # model
109
+ # ------------
110
+ model = FaceSynthetics(backbone=args.backbone)
111
+ ckpt_path = 'work_dirs/synthetics'
112
+ if not os.path.exists(ckpt_path):
113
+ os.makedirs(ckpt_path)
114
+
115
+ # ------------
116
+ # training
117
+ # ------------
118
+ checkpoint_callback = ModelCheckpoint(
119
+ monitor='val_loss',
120
+ dirpath=ckpt_path,
121
+ filename='{epoch:02d}-{val_loss:.6f}',
122
+ save_top_k=10,
123
+ mode='min',
124
+ )
125
+ lr_monitor = LearningRateMonitor(logging_interval='step')
126
+ trainer = pl.Trainer(
127
+ gpus = args.num_gpus,
128
+ accelerator="ddp",
129
+ benchmark=True,
130
+ logger=TensorBoardLogger(osp.join(ckpt_path, 'logs')),
131
+ callbacks=[checkpoint_callback, lr_monitor],
132
+ check_val_every_n_epoch=1,
133
+ progress_bar_refresh_rate=1,
134
+ max_epochs=30,
135
+ )
136
+ trainer.fit(model, train_loader, val_loader)
137
+
138
+ if __name__ == '__main__':
139
+ cli_main()
140
+
insightface/attribute/README.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Face Attribute
2
+
3
+
4
+ <div align="left">
5
+ <img src="https://insightface.ai/assets/img/custom/logo3.jpg" width="320"/>
6
+ </div>
7
+
8
+
9
+ ## Introduction
10
+
11
+ These are the face attribute methods of [InsightFace](https://insightface.ai)
12
+
13
+
14
+ <div align="left">
15
+ <img src="https://insightface.ai/assets/img/github/t1_genderage.jpg" width="600"/>
16
+ </div>
17
+
18
+
19
+
20
+ ## Methods
21
+
22
+
23
+ Supported methods:
24
+
25
+ - [x] [Gender_Age](gender_age)
26
+
27
+
28
+
29
+ ## Contributing
30
+
31
+ We appreciate all contributions to improve the face attribute module of InsightFace.
32
+
33
+
insightface/attribute/_datasets_/README.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Face Attribute Datasets
2
+
3
+ (Updating)
4
+
5
+ ## Training Datasets
6
+
7
+ ### CelebA
8
+
9
+ https://mmlab.ie.cuhk.edu.hk/projects/CelebA.html
10
+
11
+
12
+
13
+ ## Test Datasets
14
+
15
+
insightface/attribute/gender_age/test.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import cv2
3
+ import sys
4
+ import numpy as np
5
+ import insightface
6
+ from insightface.app import FaceAnalysis
7
+ from insightface.data import get_image as ins_get_image
8
+
9
+
10
+ parser = argparse.ArgumentParser(description='insightface gender-age test')
11
+ # general
12
+ parser.add_argument('--ctx', default=0, type=int, help='ctx id, <0 means using cpu')
13
+ args = parser.parse_args()
14
+
15
+ app = FaceAnalysis(allowed_modules=['detection', 'genderage'])
16
+ app.prepare(ctx_id=args.ctx, det_size=(640,640))
17
+
18
+ img = ins_get_image('t1')
19
+ faces = app.get(img)
20
+ assert len(faces)==6
21
+ for face in faces:
22
+ print(face.bbox)
23
+ print(face.sex, face.age)
24
+
insightface/benchmarks/train/nvidia_a10.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training performance report on NVIDIA A10
2
+
3
+ [NVIDIA A10 Tensor Core GPU](https://www.nvidia.com/en-us/data-center/products/a10-gpu/)
4
+
5
+ We can use A10 to train deep learning models by its FP16 and TF32 supports.
6
+
7
+
8
+
9
+ ## Test Server Spec
10
+
11
+ | Key | Value |
12
+ | ------------ | ------------------------------------------------ |
13
+ | System | ServMax G408-X2 Rackmountable Server |
14
+ | CPU | 2 x Intel(R) Xeon(R) Gold 5220R CPU @ 2.20GHz |
15
+ | Memory | 384GB, 12 x Samsung 32GB DDR4-2933 |
16
+ | GPU | 8 x NVIDIA A10 22GB |
17
+ | Cooling | 2x Customized GPU Kit for GPU support FAN-1909L2 |
18
+ | Hard Drive | Intel SSD S4500 1.9TB/SATA/TLC/2.5" |
19
+ | OS | Ubuntu 16.04.7 LTS |
20
+ | Installation | CUDA 11.1, cuDNN 8.0.5 |
21
+ | Installation | Python 3.7.10 |
22
+ | Installation | PyTorch 1.9.0 (conda) |
23
+
24
+ This server is donated by [AMAX](https://www.amaxchina.com/), many thanks!
25
+
26
+
27
+
28
+ ## Experiments on arcface_torch
29
+
30
+ We report training speed in following table, please also note that:
31
+
32
+ 1. The training dataset is in mxnet record format and located on SSD hard drive.
33
+
34
+ 2. Embedding-size are all set to 512.
35
+
36
+ 3. We use a large dataset which contains about 618K identities to simulate real cases.
37
+
38
+ | Dataset | Classes | Backbone | Batch-size | FP16 | TF32 | Samples/sec |
39
+ | ----------- | ------- | ----------- | ---------- | ---- | ---- | ----------- |
40
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | × | ~2040 |
41
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | √ | ~2255 |
42
+ | WebFace600K | 618K | IResNet-50 | 1024 | √ | × | ~3300 |
43
+ | WebFace600K | 618K | IResNet-50 | 1024 | √ | √ | ~3360 |
44
+ | WebFace600K | 618K | IResNet-50 | 2048 | √ | √ | ~3940 |
45
+ | WebFace600K | 618K | IResNet-100 | 1024 | √ | √ | ~2210 |
46
+ | WebFace600K | 618K | IResNet-180 | 1024 | √ | √ | ~1410 |
47
+
48
+
insightface/benchmarks/train/nvidia_a100.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training performance report on NVIDIA A100
2
+
3
+ [NVIDIA A100 Tensor Core GPU](https://www.nvidia.com/en-us/data-center/a100/)
4
+
5
+
6
+
7
+ ## Test Server Spec
8
+
9
+ | Key | Value |
10
+ | ------------ | ------------------------------------------------ |
11
+ | System | ServMax G408-X2 Rackmountable Server |
12
+ | CPU | 2 x Intel(R) Xeon(R) Gold 5220R CPU @ 2.20GHz |
13
+ | Memory | 384GB, 12 x Samsung 32GB DDR4-2933 |
14
+ | GPU | 8 x NVIDIA A100 80GB |
15
+ | Cooling | 2x Customized GPU Kit for GPU support FAN-1909L2 |
16
+ | Hard Drive | Intel SSD S4500 1.9TB/SATA/TLC/2.5" |
17
+ | OS | Ubuntu 16.04.7 LTS |
18
+ | Installation | CUDA 11.1, cuDNN 8.0.5 |
19
+ | Installation | Python 3.7.10 |
20
+ | Installation | PyTorch 1.9.0 (conda) |
21
+
22
+ This server is donated by [AMAX](https://www.amaxchina.com/), many thanks!
23
+
24
+
25
+
26
+ ## Experiments on arcface_torch
27
+
28
+ We report training speed in following table, please also note that:
29
+
30
+ 1. The training dataset is in mxnet record format and located on SSD hard drive.
31
+ 2. Embedding-size are all set to 512.
32
+ 3. We use large datasets with about 618K/2M identities to simulate real cases.
33
+ 4. We test the 10K batch-size on real dataset to take the full advantage of 80GB memory.
34
+ 5. We also test on huge synthetic datasets which include 50M~80M classes.
35
+
36
+ | Dataset | Classes | Backbone | Batch-size | PFC | FP16 | TF32 | Samples/sec | GPU Mem(GB) |
37
+ | ----------- | ------- | ----------- | ---------- | ---- | ---- | ---- | ----------- | ----------- |
38
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | × | × | ~3670 | ~18.2 |
39
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | × | √ | ~4760 | ~15.0 |
40
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | √ | × | ~5170 | ~10.1 |
41
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | √ | √ | ~5400 | ~10.1 |
42
+ | WebFace600K | 618K | IResNet-50 | 2048 | × | √ | √ | ~7780 | ~16.4 |
43
+ | WebFace600K | 618K | IResNet-50 | 10240 | × | √ | √ | ~9400 | ~66.7 |
44
+ | WebFace600K | 618K | IResNet-100 | 1024 | × | √ | √ | ~3700 | ~13.1 |
45
+ | WebFace600K | 618K | IResNet-180 | 1024 | × | √ | √ | ~2380 | ~17.5 |
46
+ | WebFace2M | 2M | IResNet-100 | 1024 | × | √ | √ | ~3480 | ~20.5 |
47
+ | WebFace2M | 2M | IResNet-180 | 1024 | × | √ | √ | ~2350 | ~25.0 |
48
+ | WebFace2M | 2M | IResNet-300 | 1024 | × | √ | √ | ~1541 | ~32.6 |
49
+ | Virtual | 50M | IResNet-50 | 1024 | 0.1 | √ | √ | ~2700 | ~54.1 |
50
+ | Virtual | 70M | IResNet-50 | 1024 | 0.1 | √ | √ | ~2170 | ~73.7 |
51
+ | Virtual | 80M | IResNet-50 | 1024 | 0.1 | √ | √ | ~1080 | ~79.6 |
52
+
53
+
insightface/benchmarks/train/nvidia_a30.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training performance report on NVIDIA A30
2
+
3
+ [NVIDIA A30 Tensor Core GPU](https://www.nvidia.com/en-us/data-center/products/a30-gpu/) is the most versatile mainstream
4
+ compute GPU for AI inference and mainstream enterprise
5
+ workloads.
6
+
7
+ Besides, we can also use A30 to train deep learning models by its FP16 and TF32 supports.
8
+
9
+
10
+
11
+ ## Test Server Spec
12
+
13
+ | Key | Value |
14
+ | ------------ | ------------------------------------------------ |
15
+ | System | ServMax G408-X2 Rackmountable Server |
16
+ | CPU | 2 x Intel(R) Xeon(R) Gold 5220R CPU @ 2.20GHz |
17
+ | Memory | 384GB, 12 x Samsung 32GB DDR4-2933 |
18
+ | GPU | 8 x NVIDIA A30 24GB |
19
+ | Cooling | 2x Customized GPU Kit for GPU support FAN-1909L2 |
20
+ | Hard Drive | Intel SSD S4500 1.9TB/SATA/TLC/2.5" |
21
+ | OS | Ubuntu 16.04.7 LTS |
22
+ | Installation | CUDA 11.1, cuDNN 8.0.5 |
23
+ | Installation | Python 3.7.10 |
24
+ | Installation | PyTorch 1.9 (conda) |
25
+
26
+ This server is donated by [AMAX](https://www.amaxchina.com/), many thanks!
27
+
28
+
29
+
30
+ ## Experiments on arcface_torch
31
+
32
+ We report training speed in following table, please also note that:
33
+
34
+ 1. The training dataset is in mxnet record format and located on SSD hard drive.
35
+
36
+ 2. Embedding-size are all set to 512.
37
+
38
+ 3. We use a large dataset which contains about 618K identities to simulate real cases.
39
+
40
+ | Dataset | Classes | Backbone | Batch-size | FP16 | TF32 | Samples/sec |
41
+ | ----------- | ------- | ----------- | ---------- | ---- | ---- | ----------- |
42
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | × | ~2230 |
43
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | √ | ~3200 |
44
+ | WebFace600K | 618K | IResNet-50 | 1024 | √ | × | ~3940 |
45
+ | WebFace600K | 618K | IResNet-50 | 1024 | √ | √ | ~4350 |
46
+ | WebFace600K | 618K | IResNet-50 | 2048 | √ | √ | ~5100 |
47
+ | WebFace600K | 618K | IResNet-100 | 1024 | √ | √ | ~2810 |
48
+ | WebFace600K | 618K | IResNet-180 | 1024 | √ | √ | ~1800 |
49
+
50
+
51
+
52
+
insightface/benchmarks/train/nvidia_rtx3080.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training performance report on NVIDIA RTX3080
2
+
3
+ [GeForce RTX 3080](https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3080-3080ti/)
4
+ The GeForce RTX™ 3080 Ti and RTX 3080 graphics cards deliver the ultra performance that gamers crave, powered by Ampere—NVIDIA’s 2nd gen RTX architecture. They are built with enhanced RT Cores and Tensor Cores, new streaming multiprocessors, and superfast G6X memory for an amazing gaming experience.
5
+
6
+ Besides, we can also use GeForce RTX™ 3080 to train deep learning models by its FP16 and TF32 supports.
7
+
8
+
9
+
10
+ ## Test Server Spec
11
+
12
+ | Key | Value |
13
+ |--------------|---------------------------------------------------|
14
+ | CPU | 2 x Intel(R) Xeon(R) Platinum 8255C CPU @ 2.50GHz |
15
+ | Memory | 384GB |
16
+ | GPU | 8 x GeForce RTX™ 3080 |
17
+ | OS | Ubuntu 18.04.4 LTS |
18
+ | Installation | CUDA 11.1, |
19
+ | Installation | Python 3.7.3 |
20
+ | Installation | PyTorch 1.9.0 (pip) |
21
+
22
+
23
+ ## Experiments on arcface_torch
24
+
25
+ We report training speed in following table, please also note that:
26
+
27
+ 1. The training dataset is SyntheticDataset.
28
+
29
+ 2. Embedding-size are all set to 512.
30
+
31
+
32
+ ### 1. 2 Million Identities
33
+
34
+ We use a large dataset which contains about 2 millions identities to simulate real cases.
35
+
36
+
37
+ | Dataset | Classes | Backbone | Batch-size | FP16 | Partial FC | Samples/sec |
38
+ |------------|------------|------------|------------|------|------------|-------------|
39
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | × | × | Fail |
40
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | x | √ | ~2190 |
41
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | √ | × | Fail |
42
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | √ | √ | ~2620 |
43
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | × | × | Fail |
44
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | x | √ | Fail |
45
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | × | Fail |
46
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | √ | ~3800 |
47
+
48
+ ### 2. 600K Identities
49
+
50
+ We use a large dataset which contains about 600k identities to simulate real cases.
51
+
52
+ | Dataset | Classes | Backbone | Batch-size | Partial FC | FP16 | Samples/sec |
53
+ |-------------|---------|------------|------------|------------|------|-------------|
54
+ | WebFace600K | 618K | IResNet-50 | 512 | × | × | ~2023 |
55
+ | WebFace600K | 618K | IResNet-50 | 512 | × | √ | ~2392 |
56
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | × | Fail |
57
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | √ | Fail |
58
+ | WebFace600K | 618K | IResNet-50 | 1024 | √ | √ | ~4010 |
insightface/benchmarks/train/nvidia_rtx3090.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training performance report on NVIDIA RTX3090
2
+
3
+ [GEFORCE RTX 3090](https://www.nvidia.com/en-us/geforce/graphics-cards/30-series/rtx-3090/)
4
+ The GeForce RTX™ 3090 is a big ferocious GPU (BFGPU) with TITAN class performance.
5
+
6
+ Besides, we can also use GeForce RTX™ 3090 to train deep learning models by its FP16 and TF32 supports.
7
+
8
+
9
+
10
+ ## Test Server Spec
11
+
12
+ | Key | Value |
13
+ |--------------|---------------------------------------------------|
14
+ | CPU | 2 x Intel(R) Xeon(R) Platinum 8255C CPU @ 2.50GHz |
15
+ | Memory | 384GB |
16
+ | GPU | 8 x GeForce RTX™ 3090 |
17
+ | OS | Ubuntu 18.04.4 LTS |
18
+ | Installation | CUDA 11.1, |
19
+ | Installation | Python 3.7.3 |
20
+ | Installation | PyTorch 1.9.0 (pip) |
21
+
22
+
23
+ ## Experiments on arcface_torch
24
+
25
+ We report training speed in following table, please also note that:
26
+
27
+ 1. The training dataset is SyntheticDataset.
28
+
29
+ 2. Embedding-size are all set to 512.
30
+
31
+
32
+ ### 1. 2 Million Identities
33
+
34
+ We use a large dataset which contains about 2 millions identities to simulate real cases.
35
+
36
+ | Dataset | Classes | Backbone | Batch-size | FP16 | TF32 | Partial FC | Samples/sec |
37
+ |------------|------------|------------|------------|------|------|------------|-------------|
38
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | × | × | × | ~1750 |
39
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | × | √ | × | ~1810 |
40
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | √ | √ | × | ~2056 |
41
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | √ | √ | √ | ~2850 |
42
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | √ | × | ~2810 |
43
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | √ | √ | ~4220 |
44
+ | WebFace40M | 2 Millions | IResNet-50 | 2048 | √ | √ | √ | ~5330 |
45
+
46
+
47
+ ### 2. 600K Identities
48
+
49
+ We use a large dataset which contains about 600k identities to simulate real cases.
50
+
51
+ | Dataset | Classes | Backbone | Batch-size | FP16 | Samples/sec |
52
+ |-------------|---------|------------|------------|------|-------------|
53
+ | WebFace600K | 618K | IResNet-50 | 512 | × | ~2220 |
54
+ | WebFace600K | 618K | IResNet-50 | 512 | √ | ~2610 |
55
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | ~2940 |
56
+ | WebFace600K | 618K | IResNet-50 | 1024 | √ | ~3790 |
57
+ | WebFace600K | 618K | IResNet-50 | 2048 | √ | ~4680 |
insightface/benchmarks/train/nvidia_v100.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training performance report on NVIDIA® V100
2
+
3
+ [NVIDIA® V100](https://www.nvidia.com/en-us/data-center/v100/)
4
+ NVIDIA® V100 Tensor Core is the most advanced data center GPU ever built to accelerate AI, high performance computing (HPC), data science and graphics. It’s powered by NVIDIA Volta architecture, comes in 16 and 32GB configurations, and offers the performance of up to 32 CPUs in a single GPU.
5
+
6
+ Besides, we can also use NVIDIA® V100 to train deep learning models by its FP16 and FP32 supports.
7
+
8
+ ## Test Server Spec
9
+
10
+ | Key | Value |
11
+ |--------------|----------------------------------------------|
12
+ | CPU | 2 x Intel(R) Xeon(R) Gold 6133 CPU @ 2.50GHz |
13
+ | Memory | 384GB |
14
+ | GPU | 8 x Tesla V100-SXM2-32GB |
15
+ | OS | Ubuntu 16.04 LTS |
16
+ | Installation | CUDA 10.2 |
17
+ | Installation | Python 3.7.3 |
18
+ | Installation | PyTorch 1.9.0 (pip) |
19
+
20
+ ## Experiments on arcface_torch
21
+
22
+ We report training speed in following table, please also note that:
23
+
24
+ 1. The training dataset is SyntheticDataset.
25
+
26
+ 2. Embedding-size are all set to 512.
27
+
28
+ ### 1. 2 Million Identities
29
+
30
+ We use a large dataset which contains about 2 millions identities to simulate real cases.
31
+
32
+ | Dataset | Classes | Backbone | Batch-size | FP16 | Partial FC | Samples/sec |
33
+ |------------|------------|------------|------------|------|------------|-------------|
34
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | × | × | ~1868 |
35
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | x | √ | ~2712 |
36
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | √ | × | ~2576 |
37
+ | WebFace40M | 2 Millions | IResNet-50 | 512 | √ | √ | ~4501 |
38
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | × | × | ~1960 |
39
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | x | √ | ~2922 |
40
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | × | ~2810 |
41
+ | WebFace40M | 2 Millions | IResNet-50 | 1024 | √ | √ | ~5430 |
42
+ | WebFace40M | 2 Millions | IResNet-50 | 2048 | √ | √ | ~6095 |
43
+
44
+ ### 2. 600K Identities
45
+
46
+ We use a large dataset which contains about 600k identities to simulate real cases.
47
+
48
+ | Dataset | Classes | Backbone | Batch-size | FP16 | Samples/sec |
49
+ |-------------|---------|------------|------------|------|-------------|
50
+ | WebFace600K | 618K | IResNet-50 | 512 | × | ~2430 |
51
+ | WebFace600K | 618K | IResNet-50 | 512 | √ | ~3889 |
52
+ | WebFace600K | 618K | IResNet-50 | 1024 | × | ~2607 |
53
+ | WebFace600K | 618K | IResNet-50 | 1024 | √ | ~4322 |
54
+ | WebFace600K | 618K | IResNet-50 | 2048 | √ | ~4921 |
insightface/body/human_pose/ambiguity_aware/README.md ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Towards Alleviating the Modeling Ambiguity of Unsupervised Monocular 3D Human Pose Estimation
2
+
3
+ ## Introduction
4
+
5
+ **Ambiguity-Aware** studies the ambiguity problem in the task of unsupervised 3D human pose estimation from 2D counterpart, please refer to [ICCV2022](https://openaccess.thecvf.com/content/ICCV2021/papers/Yu_Towards_Alleviating_the_Modeling_Ambiguity_of_Unsupervised_Monocular_3D_Human_ICCV_2021_paper.pdf) for more details.
6
+
7
+
8
+ <div align="center">
9
+ <img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/all.gif" alt="videovis" width="800">
10
+ </div>
11
+
12
+
13
+ ## Installation
14
+ ```
15
+ conda create -n uvhpe python=3.6
16
+ conda activate uvhpe
17
+ pip install -r requirements.txt
18
+ # for output, tensorboard, visualization
19
+ mkdir log output vis models data
20
+ ```
21
+
22
+ ## Dataset And Pretrained Models
23
+ Download our preprocessed dataset into `data` and pretrained models into `models` from [webpage](https://sites.google.com/view/ambiguity-aware-hpe)
24
+
25
+ This part will be updated soon.
26
+ ## Inference
27
+ We put some samples with preprocessed 2d keypoints at `scripts/demo_input`. Run inference with command `sh demo.sh` and output can be found at `scripts/demo_output`.
28
+
29
+ ## Evaluation
30
+ ### Evaluation on Human3.6M
31
+ ##### 2D ground-truth as inputs
32
+ * baseline `python main.py --cfg ../cfg/h36m_gt_adv.yaml --pretrain ../models/adv.pth.tar --gpu 0 --eval `
33
+ * scale `python main.py --cfg ../cfg/h36m_gt_scale.yaml --pretrain ../models/tmc_klbone.pth.tar --eval --gpu 0`
34
+
35
+ ##### 2D predictions as inputs
36
+ * baseline `python main.py --cfg ../cfg/pre_adv.yaml --pretrain ../models/pre_adv.pth.tar --gpu 0 --eval `
37
+ * scale `python main.py --cfg ../cfg/pre_tmc_klbone.yaml --pretrain ../models/pre_tmc_klbone.pth.tar --gpu 0 --eval `
38
+
39
+ **Note:** baseline is our reproduced version fo "Unsupervised 3d pose estimation with geometric self-supervision"
40
+
41
+ ### Evaluation on LSP
42
+ use the pretrained model from Human3.6M
43
+
44
+ `python eval_lsp.py --cfg ../cfg/h36m_gt_scale.yaml --pretrain ../models/tmc_klbone.pth.tar`
45
+
46
+ ### Results
47
+
48
+ The expected **MPJPE** and **P-MPJPE** results on **Human36M** dataset are shown here:
49
+
50
+ | Input | Model | MPJPE | PMPJPE |
51
+ | :--------- | :------------ | :------------: | :------------: |
52
+ | GT | baseline | 105.0 | 46.0 |
53
+ | GT | best | 87.85 | 42.0 |
54
+ | Pre | baseline | 113.3 | 54.9 |
55
+ | Pre | best | 93.1 | 52.3 |
56
+
57
+
58
+ **Note:** MPJPE from the evaluation is slightly different from the performance we release in the paper. This is because MPJPE in the paper is the best MPJPE during training process.
59
+
60
+
61
+
62
+ ## Training
63
+ ### Human3.6M
64
+ * Using ground-truth 2D as inputs:
65
+
66
+ baseline `python main.py --cfg ../cfg/h36m_gt_adv.yaml --gpu 0 `
67
+
68
+ best `python main.py --cfg ../cfg/h36m_gt_scale.yaml --gpu 0`
69
+
70
+ * Using predicted 2D as inputs:
71
+
72
+ baseline `python main.py --cfg ../cfg/pre_adv.yaml --gpu 0 `
73
+
74
+ best `python main.py --cfg ../cfg/pre_tmc_klbone.yaml --gpu 0`
75
+
76
+ ## Visualization
77
+
78
+ ### Human3.6M
79
+ <div align="center">
80
+ <img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/S9_Discussion 1.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/S9_Phoning 1.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/S9_Photo.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/S9_WalkTogether 1.gif" width="200"/>
81
+ </div>
82
+
83
+ ### Sureal
84
+ <div align="center">
85
+ <img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/surreal1.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/surreal2.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/surreal3.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/surreal4.gif" width="200"/>
86
+ </div>
87
+
88
+ ### MPI-3DHP
89
+ <div align="center">
90
+ <img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/TS1.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/TS2.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/TS3.gif" width="200"/><img src="https://github.com/yuzhenbo/yuzhenbo.github.io/raw/main/assets/extra/ICCV2022/TS6.gif" width="200"/>
91
+ </div>
92
+
93
+
94
+ ### The code of our another paper in ICCV2022 Skeleton2Mesh will be coming soon!
insightface/body/human_pose/ambiguity_aware/cfg/h36m_gt_adv.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BATCH_SIZE: 512
2
+ DATA:
3
+ NUM_FRAMES: 1
4
+ SCALE_MID_MEAN: 0.720643
5
+ SCALE_MID_STD: 0.058
6
+ USE_RANDOM_DIFF: true
7
+ NETWORK:
8
+ DIS_RES_BLOCKS: 2
9
+ DIS_TEMP_RES_BLOCKS: 2
10
+ DIS_USE_SPECTRAL_NORM: false
11
+ SCALER_INPUT_SIZE: 34
12
+ TRAIN:
13
+ BOUND_AZIM: 2.44346
14
+ BOUND_ELEV: 0.34906585
15
+ DIS_LR: 0.0002
16
+ LOSS_TYPE: ss_adv
17
+ LOSS_WEIGHTS:
18
+ - 1.0
19
+ - 1.0
20
+ - 1.0
21
+ - 1.0
22
+ MAINNET_CRITICS: 4
23
+ NUM_CRITICS: 3
24
+ NUM_CRITICS_TEMP: 3
25
+ POSE_LR: 0.0002
26
+ PRETRAIN_LIFTER: false
27
+ SCALE_LOSS_WEIGHTS:
28
+ - 0.001
29
+ - 1.0
30
+ SUBNET_CRITICS: 1
31
+ TEMP_LR: 0.0002
32
+ USE_CYCLE: false
33
+ USE_NEW_ROT: false
34
+ USE_NEW_TEMP: false
35
+ USE_SCALER: false
36
+ USE_GT: true
insightface/body/human_pose/ambiguity_aware/cfg/h36m_gt_scale.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BATCH_SIZE: 512
2
+ DATA:
3
+ NUM_FRAMES: 1
4
+ SCALE_MID_MEAN: 0.720643
5
+ SCALE_MID_STD: 0.058
6
+ USE_RANDOM_DIFF: true
7
+ NETWORK:
8
+ DIS_RES_BLOCKS: 2
9
+ DIS_TEMP_RES_BLOCKS: 2
10
+ DIS_USE_SPECTRAL_NORM: false
11
+ SCALER_INPUT_SIZE: 34
12
+ TRAIN:
13
+ BOUND_AZIM: 2.44346
14
+ BOUND_ELEV: 0.34906585
15
+ DIS_LR: 0.0001
16
+ LOSS_TYPE: ss_adv
17
+ LOSS_WEIGHTS:
18
+ - 0.5
19
+ - 5.0
20
+ - 1.0
21
+ - 1.0
22
+ MAINNET_CRITICS: 4
23
+ NUM_CRITICS: 3
24
+ NUM_CRITICS_TEMP: 3
25
+ POSE_LR: 0.00015
26
+ PRETRAIN_LIFTER: false
27
+ SCALE_LOSS_WEIGHTS:
28
+ - 0.001
29
+ - 1.0
30
+ SUBNET_CRITICS: 1
31
+ TEMP_LR: 0.0001
32
+ SCHEDULER_STEP_SIZE: 5
33
+ USE_CYCLE: true
34
+ USE_NEW_ROT: false
35
+ USE_NEW_TEMP: true
36
+ USE_SCALER: true
37
+ USE_GT: true
38
+ FIX:
39
+ FIX_TRAJ: true
40
+ FIX_TRAJ_BY_ROT: false
insightface/body/human_pose/ambiguity_aware/cfg/pre_adv.yaml ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BATCH_SIZE: 512
2
+ DATA:
3
+ NUM_FRAMES: 1
4
+ SCALE_MID_MEAN: 0.720643
5
+ SCALE_MID_STD: 0.058
6
+ USE_RANDOM_DIFF: true
7
+ NETWORK:
8
+ DIS_RES_BLOCKS: 2
9
+ DIS_TEMP_RES_BLOCKS: 2
10
+ DIS_USE_SPECTRAL_NORM: false
11
+ SCALER_INPUT_SIZE: 34
12
+ TRAIN:
13
+ BOUND_AZIM: 2.44346
14
+ BOUND_ELEV: 0.34906585
15
+ DIS_LR: 0.0001
16
+ LOSS_TYPE: ss_adv
17
+ LOSS_WEIGHTS:
18
+ - 0.5
19
+ - 5.0
20
+ - 1.0
21
+ - 1.0
22
+ MAINNET_CRITICS: 4
23
+ NUM_CRITICS: 3
24
+ NUM_CRITICS_TEMP: 3
25
+ POSE_LR: 0.0001
26
+ PRETRAIN_LIFTER: false
27
+ SCALE_LOSS_WEIGHTS:
28
+ - 0.001
29
+ - 1.0
30
+ SUBNET_CRITICS: 1
31
+ TEMP_LR: 0.0002
32
+ USE_CYCLE: false
33
+ USE_NEW_ROT: false
34
+ USE_NEW_TEMP: false
35
+ USE_SCALER: false
36
+ USE_GT: false
insightface/body/human_pose/ambiguity_aware/cfg/pre_tmc_klbone.yaml ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ BATCH_SIZE: 512
2
+ DATA:
3
+ EXP_TMC: true
4
+ EXP_TMC_DETERMINISTIC: true
5
+ EXP_TMC_INTERVAL: 3
6
+ NUM_FRAMES: 1
7
+ SCALE_MID_MEAN: 0.720643
8
+ SCALE_MID_STD: 0.058
9
+ USE_RANDOM_DIFF: true
10
+ NETWORK:
11
+ DIS_RES_BLOCKS: 2
12
+ DIS_TEMP_RES_BLOCKS: 2
13
+ DIS_USE_SPECTRAL_NORM: false
14
+ SCALER_INPUT_SIZE: 34
15
+ TRAIN:
16
+ BOUND_AZIM: 2.44346
17
+ BOUND_ELEV: 0.34906585
18
+ DIS_LR: 0.0001
19
+ LOSS_TYPE: ss_adv
20
+ LOSS_WEIGHTS:
21
+ - 0.5
22
+ - 5.0
23
+ - 1.0
24
+ - 1.0
25
+ MAINNET_CRITICS: 4
26
+ NUM_CRITICS: 3
27
+ NUM_CRITICS_TEMP: 3
28
+ POSE_LR: 0.0001
29
+ PRETRAIN_LIFTER: false
30
+ SCALE_LOSS_WEIGHTS:
31
+ - 0.01
32
+ - 1.0
33
+ SUBNET_CRITICS: 1
34
+ TEMP_LR: 0.0002
35
+ USE_CYCLE: true
36
+ USE_NEW_ROT: false
37
+ USE_NEW_TEMP: false
38
+ USE_SCALER: true
39
+ USE_GT: false
insightface/body/human_pose/ambiguity_aware/requirements.txt ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==1.13.1
2
+ torchvision==0.5.0
3
+ tqdm
4
+ opencv-python
5
+ scikit-learn
6
+ matplotlib
7
+ h5py
8
+ pyyaml
9
+ seaborn
10
+ imageio
11
+ easydict
12
+ tensorboardX
insightface/body/human_pose/ambiguity_aware/scripts/_init_paths.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os.path as osp
2
+ import sys
3
+
4
+
5
+ def add_path(path):
6
+ if path not in sys.path:
7
+ sys.path.insert(0, path)
8
+
9
+
10
+ this_dir = osp.dirname(__file__)
11
+
12
+ lib_path = osp.join(this_dir, '..')
13
+ add_path(lib_path)
insightface/body/human_pose/ambiguity_aware/scripts/demo.sh ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ rm -rf demo_output
4
+
5
+ python inference.py --indir demo_input --outdir demo_output --cfg ../cfg/h36m_gt_scale.yaml --pretrain ../models/tmc_klbone.pth.tar
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/0.jpg ADDED
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/0.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15e90d8305cb0946a6f3c08c625dad617032102e520eb9c5d4c17d0af1609482
3
+ size 513
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/1.jpg ADDED
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6aa3178631c3dd2613b3def95a2c17e284bb262ee62cefdf647fe768a8c6efc6
3
+ size 513
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/10.jpg ADDED
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/10.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01f9a4cc1b06719e65b4af28a3ac62e27afaadacc1028c9ae8e8e96d5724e23e
3
+ size 513
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/11.jpg ADDED
insightface/body/human_pose/ambiguity_aware/scripts/demo_input/11.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88c1b12e23512a66e8cb7802c06b447924ad5efae06098caf9de42ec996cd1ef
3
+ size 513