Spaces:
Sleeping
Sleeping
pose detection
Browse files- app.py +94 -9
- assets/.gitattributes +2 -0
- assets/face_detector.onnx +3 -0
- assets/face_landmarks.onnx +3 -0
- assets/model.txt +204 -0
- face_detection.py +279 -0
- mark_detection.py +56 -0
- pose_estimation.py +132 -0
- requirements.txt +6 -1
- utils.py +41 -0
app.py
CHANGED
@@ -1,14 +1,99 @@
|
|
1 |
import cv2
|
2 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
st.title("Webcam Live Feed")
|
5 |
-
run = st.checkbox('Run')
|
6 |
-
FRAME_WINDOW = st.image([])
|
7 |
-
camera = cv2.VideoCapture(0)
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
13 |
else:
|
14 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import cv2
|
2 |
import streamlit as st
|
3 |
+
import tempfile
|
4 |
+
import time
|
5 |
+
import numpy as np
|
6 |
+
from face_detection import FaceDetector
|
7 |
+
from mark_detection import MarkDetector
|
8 |
+
from pose_estimation import PoseEstimator
|
9 |
+
from utils import refine
|
10 |
|
|
|
|
|
|
|
|
|
11 |
|
12 |
+
st.title("Pose-estimation")
|
13 |
+
|
14 |
+
file_type = st.selectbox("Choose the type of file you want to upload", ("Image", "Video"))
|
15 |
+
if file_type == "Image":
|
16 |
+
uploaded_file = st.file_uploader("Upload an image of your face", type=["jpg","jpeg", "png"])
|
17 |
else:
|
18 |
+
uploaded_video = st.file_uploader("Upload a video of your face", type=["mp4","mov","avi","mkv"])
|
19 |
+
|
20 |
+
if uploaded_file is not None:
|
21 |
+
if file_type == "Video":
|
22 |
+
tfile = tempfile.NamedTemporaryFile(delete=False)
|
23 |
+
tfile.write(uploaded_file.read())
|
24 |
+
cap = cv2.VideoCapture(tfile.name)
|
25 |
+
print(f"Video source: {tfile.name}")
|
26 |
+
|
27 |
+
#getting frame sizes
|
28 |
+
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
29 |
+
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
30 |
+
|
31 |
+
#face detection
|
32 |
+
face_detector = FaceDetector("assets/face_detector.onnx")
|
33 |
+
#landmark detection
|
34 |
+
mark_detector = MarkDetector("assets/face_landmarks.onnx")
|
35 |
+
#pose estimation
|
36 |
+
pose_estimator = PoseEstimator(frame_width, frame_height)
|
37 |
+
|
38 |
+
tm = cv2.TickMeter()
|
39 |
+
|
40 |
+
while True:
|
41 |
+
|
42 |
+
# Read a frame.
|
43 |
+
frame_got, frame = cap.read()
|
44 |
+
if frame_got is False:
|
45 |
+
break
|
46 |
+
|
47 |
+
|
48 |
+
# Step 1: Get faces from current frame.
|
49 |
+
faces, _ = face_detector.detect(frame, 0.7)
|
50 |
+
|
51 |
+
# Any valid face found?
|
52 |
+
if len(faces) > 0:
|
53 |
+
tm.start()
|
54 |
+
|
55 |
+
# Step 2: Detect landmarks. Crop and feed the face area into the
|
56 |
+
# mark detector. Note only the first face will be used for
|
57 |
+
# demonstration.
|
58 |
+
face = refine(faces, frame_width, frame_height, 0.15)[0]
|
59 |
+
x1, y1, x2, y2 = face[:4].astype(int)
|
60 |
+
patch = frame[y1:y2, x1:x2]
|
61 |
+
|
62 |
+
# Run the mark detection.
|
63 |
+
marks = mark_detector.detect([patch])[0].reshape([68, 2])
|
64 |
+
|
65 |
+
# Convert the locations from local face area to the global image.
|
66 |
+
marks *= (x2 - x1)
|
67 |
+
marks[:, 0] += x1
|
68 |
+
marks[:, 1] += y1
|
69 |
+
|
70 |
+
# Step 3: Try pose estimation with 68 points.
|
71 |
+
pose = pose_estimator.solve(marks)
|
72 |
+
|
73 |
+
tm.stop()
|
74 |
+
|
75 |
+
# All done. The best way to show the result would be drawing the
|
76 |
+
# pose on the frame in realtime.
|
77 |
+
|
78 |
+
# Do you want to see the pose annotation?
|
79 |
+
pose_estimator.visualize(frame, pose, color=(0, 255, 0))
|
80 |
+
|
81 |
+
# Do you want to see the axes?
|
82 |
+
# pose_estimator.draw_axes(frame, pose)
|
83 |
+
|
84 |
+
# Do you want to see the marks?
|
85 |
+
# mark_detector.visualize(frame, marks, color=(0, 255, 0))
|
86 |
+
|
87 |
+
# Do you want to see the face bounding boxes?
|
88 |
+
# face_detector.visualize(frame, faces)
|
89 |
+
|
90 |
+
# Draw the FPS on screen.
|
91 |
+
cv2.rectangle(frame, (0, 0), (90, 30), (0, 0, 0), cv2.FILLED)
|
92 |
+
cv2.putText(frame, f"FPS: {tm.getFPS():.0f}", (10, 20),
|
93 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
|
94 |
+
|
95 |
+
# Show preview.
|
96 |
+
cv2.imshow("Preview", frame)
|
97 |
+
if cv2.waitKey(1) == 27:
|
98 |
+
break
|
99 |
+
|
assets/.gitattributes
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
face_detector.onnx filter=lfs diff=lfs merge=lfs -text
|
2 |
+
face_landmarks.onnx filter=lfs diff=lfs merge=lfs -text
|
assets/face_detector.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08bd3e3febd685ffb4fd7d9d16a101614cc7fc6ab08029d3cb6abe5fb12d3c64
|
3 |
+
size 3291589
|
assets/face_landmarks.onnx
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e848578c7ac2474b35e0c4b9a1498ff4145c525552b3d845bdb1f66c8a9d85c2
|
3 |
+
size 29402017
|
assets/model.txt
ADDED
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-73.393523
|
2 |
+
-72.775014
|
3 |
+
-70.533638
|
4 |
+
-66.850058
|
5 |
+
-59.790187
|
6 |
+
-48.368973
|
7 |
+
-34.121101
|
8 |
+
-17.875411
|
9 |
+
0.098749
|
10 |
+
17.477031
|
11 |
+
32.648966
|
12 |
+
46.372358
|
13 |
+
57.343480
|
14 |
+
64.388482
|
15 |
+
68.212038
|
16 |
+
70.486405
|
17 |
+
71.375822
|
18 |
+
-61.119406
|
19 |
+
-51.287588
|
20 |
+
-37.804800
|
21 |
+
-24.022754
|
22 |
+
-11.635713
|
23 |
+
12.056636
|
24 |
+
25.106256
|
25 |
+
38.338588
|
26 |
+
51.191007
|
27 |
+
60.053851
|
28 |
+
0.653940
|
29 |
+
0.804809
|
30 |
+
0.992204
|
31 |
+
1.226783
|
32 |
+
-14.772472
|
33 |
+
-7.180239
|
34 |
+
0.555920
|
35 |
+
8.272499
|
36 |
+
15.214351
|
37 |
+
-46.047290
|
38 |
+
-37.674688
|
39 |
+
-27.883856
|
40 |
+
-19.648268
|
41 |
+
-28.272965
|
42 |
+
-38.082418
|
43 |
+
19.265868
|
44 |
+
27.894191
|
45 |
+
37.437529
|
46 |
+
45.170805
|
47 |
+
38.196454
|
48 |
+
28.764989
|
49 |
+
-28.916267
|
50 |
+
-17.533194
|
51 |
+
-6.684590
|
52 |
+
0.381001
|
53 |
+
8.375443
|
54 |
+
18.876618
|
55 |
+
28.794412
|
56 |
+
19.057574
|
57 |
+
8.956375
|
58 |
+
0.381549
|
59 |
+
-7.428895
|
60 |
+
-18.160634
|
61 |
+
-24.377490
|
62 |
+
-6.897633
|
63 |
+
0.340663
|
64 |
+
8.444722
|
65 |
+
24.474473
|
66 |
+
8.449166
|
67 |
+
0.205322
|
68 |
+
-7.198266
|
69 |
+
-29.801432
|
70 |
+
-10.949766
|
71 |
+
7.929818
|
72 |
+
26.074280
|
73 |
+
42.564390
|
74 |
+
56.481080
|
75 |
+
67.246992
|
76 |
+
75.056892
|
77 |
+
77.061286
|
78 |
+
74.758448
|
79 |
+
66.929021
|
80 |
+
56.311389
|
81 |
+
42.419126
|
82 |
+
25.455880
|
83 |
+
6.990805
|
84 |
+
-11.666193
|
85 |
+
-30.365191
|
86 |
+
-49.361602
|
87 |
+
-58.769795
|
88 |
+
-61.996155
|
89 |
+
-61.033399
|
90 |
+
-56.686759
|
91 |
+
-57.391033
|
92 |
+
-61.902186
|
93 |
+
-62.777713
|
94 |
+
-59.302347
|
95 |
+
-50.190255
|
96 |
+
-42.193790
|
97 |
+
-30.993721
|
98 |
+
-19.944596
|
99 |
+
-8.414541
|
100 |
+
2.598255
|
101 |
+
4.751589
|
102 |
+
6.562900
|
103 |
+
4.661005
|
104 |
+
2.643046
|
105 |
+
-37.471411
|
106 |
+
-42.730510
|
107 |
+
-42.711517
|
108 |
+
-36.754742
|
109 |
+
-35.134493
|
110 |
+
-34.919043
|
111 |
+
-37.032306
|
112 |
+
-43.342445
|
113 |
+
-43.110822
|
114 |
+
-38.086515
|
115 |
+
-35.532024
|
116 |
+
-35.484289
|
117 |
+
28.612716
|
118 |
+
22.172187
|
119 |
+
19.029051
|
120 |
+
20.721118
|
121 |
+
19.035460
|
122 |
+
22.394109
|
123 |
+
28.079924
|
124 |
+
36.298248
|
125 |
+
39.634575
|
126 |
+
40.395647
|
127 |
+
39.836405
|
128 |
+
36.677899
|
129 |
+
28.677771
|
130 |
+
25.475976
|
131 |
+
26.014269
|
132 |
+
25.326198
|
133 |
+
28.323008
|
134 |
+
30.596216
|
135 |
+
31.408738
|
136 |
+
30.844876
|
137 |
+
47.667532
|
138 |
+
45.909403
|
139 |
+
44.842580
|
140 |
+
43.141114
|
141 |
+
38.635298
|
142 |
+
30.750622
|
143 |
+
18.456453
|
144 |
+
3.609035
|
145 |
+
-0.881698
|
146 |
+
5.181201
|
147 |
+
19.176563
|
148 |
+
30.770570
|
149 |
+
37.628629
|
150 |
+
40.886309
|
151 |
+
42.281449
|
152 |
+
44.142567
|
153 |
+
47.140426
|
154 |
+
14.254422
|
155 |
+
7.268147
|
156 |
+
0.442051
|
157 |
+
-6.606501
|
158 |
+
-11.967398
|
159 |
+
-12.051204
|
160 |
+
-7.315098
|
161 |
+
-1.022953
|
162 |
+
5.349435
|
163 |
+
11.615746
|
164 |
+
-13.380835
|
165 |
+
-21.150853
|
166 |
+
-29.284036
|
167 |
+
-36.948060
|
168 |
+
-20.132003
|
169 |
+
-23.536684
|
170 |
+
-25.944448
|
171 |
+
-23.695741
|
172 |
+
-20.858157
|
173 |
+
7.037989
|
174 |
+
3.021217
|
175 |
+
1.353629
|
176 |
+
-0.111088
|
177 |
+
-0.147273
|
178 |
+
1.476612
|
179 |
+
-0.665746
|
180 |
+
0.247660
|
181 |
+
1.696435
|
182 |
+
4.894163
|
183 |
+
0.282961
|
184 |
+
-1.172675
|
185 |
+
-2.240310
|
186 |
+
-15.934335
|
187 |
+
-22.611355
|
188 |
+
-23.748437
|
189 |
+
-22.721995
|
190 |
+
-15.610679
|
191 |
+
-3.217393
|
192 |
+
-14.987997
|
193 |
+
-22.554245
|
194 |
+
-23.591626
|
195 |
+
-22.406106
|
196 |
+
-15.121907
|
197 |
+
-4.785684
|
198 |
+
-20.893742
|
199 |
+
-22.220479
|
200 |
+
-21.025520
|
201 |
+
-5.712776
|
202 |
+
-20.671489
|
203 |
+
-21.903670
|
204 |
+
-20.328022
|
face_detection.py
ADDED
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
import onnxruntime
|
5 |
+
|
6 |
+
|
7 |
+
#Below are functions that are used to calculate distances
|
8 |
+
def distance2bbox(points, distance, max_shape=None):
|
9 |
+
x1 = points[:, 0] - distance[:, 0]
|
10 |
+
y1 = points[:, 1] - distance[:, 1]
|
11 |
+
x2 = points[:, 0] + distance[:, 2]
|
12 |
+
y2 = points[:, 1] + distance[:, 3]
|
13 |
+
if max_shape is not None:
|
14 |
+
x1 = x1.clamp(min=0, max=max_shape[1])
|
15 |
+
y1 = y1.clamp(min=0, max=max_shape[0])
|
16 |
+
x2 = x2.clamp(min=0, max=max_shape[1])
|
17 |
+
y2 = y2.clamp(min=0, max=max_shape[0])
|
18 |
+
return np.stack([x1, y1, x2, y2], axis=-1)
|
19 |
+
|
20 |
+
|
21 |
+
def distance2kps(points, distance, max_shape=None):
|
22 |
+
preds = []
|
23 |
+
for i in range(0, distance.shape[1], 2):
|
24 |
+
px = points[:, i % 2] + distance[:, i]
|
25 |
+
py = points[:, i % 2 + 1] + distance[:, i + 1]
|
26 |
+
if max_shape is not None:
|
27 |
+
px = px.clamp(min=0, max=max_shape[1])
|
28 |
+
py = py.clamp(min=0, max=max_shape[0])
|
29 |
+
preds.append(px)
|
30 |
+
preds.append(py)
|
31 |
+
return np.stack(preds, axis=-1)
|
32 |
+
|
33 |
+
#Face Detector
|
34 |
+
class FaceDetector:
|
35 |
+
def __init__(self, model_file):
|
36 |
+
assert os.path.exists(model_file), f"File not found: {model_file}"
|
37 |
+
self.center_cache = {}
|
38 |
+
self.nms_threshold = 0.4
|
39 |
+
self.session = onnxruntime.InferenceSession(
|
40 |
+
model_file, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
41 |
+
|
42 |
+
# Get model configurations from the model file.
|
43 |
+
# What is the input like?
|
44 |
+
input_cfg = self.session.get_inputs()[0]
|
45 |
+
input_name = input_cfg.name
|
46 |
+
input_shape = input_cfg.shape
|
47 |
+
self.input_size = tuple(input_shape[2:4][::-1])
|
48 |
+
|
49 |
+
# How about the outputs?
|
50 |
+
outputs = self.session.get_outputs()
|
51 |
+
output_names = []
|
52 |
+
for o in outputs:
|
53 |
+
output_names.append(o.name)
|
54 |
+
self.input_name = input_name
|
55 |
+
self.output_names = output_names
|
56 |
+
|
57 |
+
# And any key points?
|
58 |
+
self._with_kps = False
|
59 |
+
self._anchor_ratio = 1.0
|
60 |
+
self._num_anchors = 1
|
61 |
+
|
62 |
+
if len(outputs) == 6:
|
63 |
+
self._offset = 3
|
64 |
+
self._strides = [8, 16, 32]
|
65 |
+
self._num_anchors = 2
|
66 |
+
elif len(outputs) == 9:
|
67 |
+
self._offset = 3
|
68 |
+
self._strides = [8, 16, 32]
|
69 |
+
self._num_anchors = 2
|
70 |
+
self._with_kps = True
|
71 |
+
elif len(outputs) == 10:
|
72 |
+
self._offset = 5
|
73 |
+
self._strides = [8, 16, 32, 64, 128]
|
74 |
+
self._num_anchors = 1
|
75 |
+
elif len(outputs) == 15:
|
76 |
+
self._offset = 5
|
77 |
+
self._strides = [8, 16, 32, 64, 128]
|
78 |
+
self._num_anchors = 1
|
79 |
+
self._with_kps = True
|
80 |
+
|
81 |
+
def _preprocess(self, image):
|
82 |
+
inputs = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
|
83 |
+
inputs = inputs - np.array([127.5, 127.5, 127.5])
|
84 |
+
inputs = inputs / 128
|
85 |
+
inputs = np.expand_dims(inputs, axis=0)
|
86 |
+
inputs = np.transpose(inputs, [0, 3, 1, 2])
|
87 |
+
|
88 |
+
return inputs.astype(np.float32)
|
89 |
+
|
90 |
+
def forward(self, img, threshold):
|
91 |
+
scores_list = []
|
92 |
+
bboxes_list = []
|
93 |
+
kpss_list = []
|
94 |
+
|
95 |
+
inputs = self._preprocess(img)
|
96 |
+
predictions = self.session.run(
|
97 |
+
self.output_names, {self.input_name: inputs})
|
98 |
+
|
99 |
+
input_height = inputs.shape[2]
|
100 |
+
input_width = inputs.shape[3]
|
101 |
+
offset = self._offset
|
102 |
+
|
103 |
+
for idx, stride in enumerate(self._strides):
|
104 |
+
scores_pred = predictions[idx]
|
105 |
+
bbox_preds = predictions[idx + offset] * stride
|
106 |
+
if self._with_kps:
|
107 |
+
kps_preds = predictions[idx + offset * 2] * stride
|
108 |
+
|
109 |
+
# Generate the anchors.
|
110 |
+
height = input_height // stride
|
111 |
+
width = input_width // stride
|
112 |
+
key = (height, width, stride)
|
113 |
+
|
114 |
+
if key in self.center_cache:
|
115 |
+
anchor_centers = self.center_cache[key]
|
116 |
+
else:
|
117 |
+
# solution-3:
|
118 |
+
anchor_centers = np.stack(
|
119 |
+
np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
|
120 |
+
anchor_centers = (anchor_centers * stride).reshape((-1, 2))
|
121 |
+
|
122 |
+
if self._num_anchors > 1:
|
123 |
+
anchor_centers = np.stack(
|
124 |
+
[anchor_centers] * self._num_anchors, axis=1).reshape((-1, 2))
|
125 |
+
|
126 |
+
if len(self.center_cache) < 100:
|
127 |
+
self.center_cache[key] = anchor_centers
|
128 |
+
|
129 |
+
# solution-1, c style:
|
130 |
+
# anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 )
|
131 |
+
# for i in range(height):
|
132 |
+
# anchor_centers[i, :, 1] = i
|
133 |
+
# for i in range(width):
|
134 |
+
# anchor_centers[:, i, 0] = i
|
135 |
+
|
136 |
+
# solution-2:
|
137 |
+
# ax = np.arange(width, dtype=np.float32)
|
138 |
+
# ay = np.arange(height, dtype=np.float32)
|
139 |
+
# xv, yv = np.meshgrid(np.arange(width), np.arange(height))
|
140 |
+
# anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32)
|
141 |
+
|
142 |
+
# Filter the results by scores and threshold.
|
143 |
+
pos_inds = np.where(scores_pred >= threshold)[0]
|
144 |
+
bboxes = distance2bbox(anchor_centers, bbox_preds)
|
145 |
+
pos_scores = scores_pred[pos_inds]
|
146 |
+
pos_bboxes = bboxes[pos_inds]
|
147 |
+
scores_list.append(pos_scores)
|
148 |
+
bboxes_list.append(pos_bboxes)
|
149 |
+
|
150 |
+
if self._with_kps:
|
151 |
+
kpss = distance2kps(anchor_centers, kps_preds)
|
152 |
+
kpss = kpss.reshape((kpss.shape[0], -1, 2))
|
153 |
+
pos_kpss = kpss[pos_inds]
|
154 |
+
kpss_list.append(pos_kpss)
|
155 |
+
|
156 |
+
return scores_list, bboxes_list, kpss_list
|
157 |
+
|
158 |
+
def _nms(self, detections):
|
159 |
+
"""None max suppression."""
|
160 |
+
x1 = detections[:, 0]
|
161 |
+
y1 = detections[:, 1]
|
162 |
+
x2 = detections[:, 2]
|
163 |
+
y2 = detections[:, 3]
|
164 |
+
scores = detections[:, 4]
|
165 |
+
|
166 |
+
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
167 |
+
order = scores.argsort()[::-1]
|
168 |
+
|
169 |
+
keep = []
|
170 |
+
while order.size > 0:
|
171 |
+
i = order[0]
|
172 |
+
keep.append(i)
|
173 |
+
|
174 |
+
_x1 = np.maximum(x1[i], x1[order[1:]])
|
175 |
+
_y1 = np.maximum(y1[i], y1[order[1:]])
|
176 |
+
_x2 = np.minimum(x2[i], x2[order[1:]])
|
177 |
+
_y2 = np.minimum(y2[i], y2[order[1:]])
|
178 |
+
|
179 |
+
w = np.maximum(0.0, _x2 - _x1 + 1)
|
180 |
+
h = np.maximum(0.0, _y2 - _y1 + 1)
|
181 |
+
inter = w * h
|
182 |
+
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
183 |
+
|
184 |
+
inds = np.where(ovr <= self.nms_threshold)[0]
|
185 |
+
order = order[inds + 1]
|
186 |
+
|
187 |
+
return keep
|
188 |
+
|
189 |
+
def detect(self, img, threshold=0.5, input_size=None, max_num=0, metric='default'):
|
190 |
+
input_size = self.input_size if input_size is None else input_size
|
191 |
+
|
192 |
+
# Rescale the image?
|
193 |
+
img_height, img_width, _ = img.shape
|
194 |
+
ratio_img = float(img_height) / img_width
|
195 |
+
|
196 |
+
input_width, input_height = input_size
|
197 |
+
ratio_model = float(input_height) / input_width
|
198 |
+
|
199 |
+
if ratio_img > ratio_model:
|
200 |
+
new_height = input_height
|
201 |
+
new_width = int(new_height / ratio_img)
|
202 |
+
else:
|
203 |
+
new_width = input_width
|
204 |
+
new_height = int(new_width * ratio_img)
|
205 |
+
|
206 |
+
det_scale = float(new_height) / img_height
|
207 |
+
resized_img = cv2.resize(img, (new_width, new_height))
|
208 |
+
|
209 |
+
det_img = np.zeros((input_size[1], input_size[0], 3), dtype=np.uint8)
|
210 |
+
det_img[:new_height, :new_width, :] = resized_img
|
211 |
+
|
212 |
+
scores_list, bboxes_list, kpss_list = self.forward(det_img, threshold)
|
213 |
+
scores = np.vstack(scores_list)
|
214 |
+
scores_ravel = scores.ravel()
|
215 |
+
order = scores_ravel.argsort()[::-1]
|
216 |
+
|
217 |
+
bboxes = np.vstack(bboxes_list) / det_scale
|
218 |
+
|
219 |
+
if self._with_kps:
|
220 |
+
kpss = np.vstack(kpss_list) / det_scale
|
221 |
+
pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
|
222 |
+
pre_det = pre_det[order, :]
|
223 |
+
|
224 |
+
keep = self._nms(pre_det)
|
225 |
+
|
226 |
+
det = pre_det[keep, :]
|
227 |
+
|
228 |
+
if self._with_kps:
|
229 |
+
kpss = kpss[order, :, :]
|
230 |
+
kpss = kpss[keep, :, :]
|
231 |
+
else:
|
232 |
+
kpss = None
|
233 |
+
|
234 |
+
if max_num > 0 and det.shape[0] > max_num:
|
235 |
+
area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
|
236 |
+
img_center = img.shape[0] // 2, img.shape[1] // 2
|
237 |
+
offsets = np.vstack([
|
238 |
+
(det[:, 0] + det[:, 2]) / 2 - img_center[1],
|
239 |
+
(det[:, 1] + det[:, 3]) / 2 - img_center[0]])
|
240 |
+
|
241 |
+
offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
|
242 |
+
|
243 |
+
if metric == 'max':
|
244 |
+
values = area
|
245 |
+
else:
|
246 |
+
# some extra weight on the centering
|
247 |
+
values = area - offset_dist_squared * 2.0
|
248 |
+
|
249 |
+
# some extra weight on the centering
|
250 |
+
bindex = np.argsort(values)[::-1]
|
251 |
+
bindex = bindex[0:max_num]
|
252 |
+
det = det[bindex, :]
|
253 |
+
|
254 |
+
if kpss is not None:
|
255 |
+
kpss = kpss[bindex, :]
|
256 |
+
|
257 |
+
return det, kpss
|
258 |
+
|
259 |
+
def visualize(self, image, results, box_color=(0, 255, 0), text_color=(0, 0, 0)):
|
260 |
+
"""Visualize the detection results.
|
261 |
+
|
262 |
+
Args:
|
263 |
+
image (np.ndarray): image to draw marks on.
|
264 |
+
results (np.ndarray): face detection results.
|
265 |
+
box_color (tuple, optional): color of the face box. Defaults to (0, 255, 0).
|
266 |
+
text_color (tuple, optional): color of the face marks (5 points). Defaults to (0, 0, 255).
|
267 |
+
"""
|
268 |
+
for det in results:
|
269 |
+
bbox = det[0:4].astype(np.int32)
|
270 |
+
conf = det[-1]
|
271 |
+
cv2.rectangle(image, (bbox[0], bbox[1]),
|
272 |
+
(bbox[2], bbox[3]), box_color)
|
273 |
+
label = f"face: {conf:.2f}"
|
274 |
+
label_size, base_line = cv2.getTextSize(
|
275 |
+
label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
|
276 |
+
cv2.rectangle(image, (bbox[0], bbox[1] - label_size[1]),
|
277 |
+
(bbox[2], bbox[1] + base_line), box_color, cv2.FILLED)
|
278 |
+
cv2.putText(image, label, (bbox[0], bbox[1]),
|
279 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color)
|
mark_detection.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
import cv2
|
4 |
+
import numpy as np
|
5 |
+
import onnxruntime as ort
|
6 |
+
|
7 |
+
|
8 |
+
class MarkDetector:
|
9 |
+
"""Facial landmark detector by Convolutional Neural Network"""
|
10 |
+
|
11 |
+
def __init__(self, model_file):
|
12 |
+
"""Initialize a mark detector.
|
13 |
+
|
14 |
+
Args:
|
15 |
+
model_file (str): ONNX model path.
|
16 |
+
"""
|
17 |
+
assert os.path.exists(model_file), f"File not found: {model_file}"
|
18 |
+
self._input_size = 128
|
19 |
+
self.model = ort.InferenceSession(
|
20 |
+
model_file, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
|
21 |
+
|
22 |
+
def _preprocess(self, bgrs):
|
23 |
+
"""Preprocess the inputs to meet the model's needs.
|
24 |
+
|
25 |
+
Args:
|
26 |
+
bgrs (np.ndarray): a list of input images in BGR format.
|
27 |
+
|
28 |
+
Returns:
|
29 |
+
tf.Tensor: a tensor
|
30 |
+
"""
|
31 |
+
rgbs = []
|
32 |
+
for img in bgrs:
|
33 |
+
img = cv2.resize(img, (self._input_size, self._input_size))
|
34 |
+
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
35 |
+
rgbs.append(img)
|
36 |
+
|
37 |
+
return rgbs
|
38 |
+
|
39 |
+
def detect(self, images):
|
40 |
+
"""Detect facial marks from an face image.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
images: a list of face images.
|
44 |
+
|
45 |
+
Returns:
|
46 |
+
marks: the facial marks as a numpy array of shape [Batch, 68*2].
|
47 |
+
"""
|
48 |
+
inputs = self._preprocess(images)
|
49 |
+
marks = self.model.run(["dense_1"], {"image_input": inputs})
|
50 |
+
return np.array(marks)
|
51 |
+
|
52 |
+
def visualize(self, image, marks, color=(255, 255, 255)):
|
53 |
+
"""Draw mark points on image"""
|
54 |
+
for mark in marks:
|
55 |
+
cv2.circle(image, (int(mark[0]), int(
|
56 |
+
mark[1])), 1, color, -1, cv2.LINE_AA)
|
pose_estimation.py
ADDED
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""Estimate head pose according to the facial landmarks"""
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
|
6 |
+
class PoseEstimator:
|
7 |
+
"""Estimate head pose according to the facial landmarks"""
|
8 |
+
|
9 |
+
def __init__(self, image_width, image_height):
|
10 |
+
"""Init a pose estimator.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
image_width (int): input image width
|
14 |
+
image_height (int): input image height
|
15 |
+
"""
|
16 |
+
self.size = (image_height, image_width)
|
17 |
+
self.model_points_68 = self._get_full_model_points()
|
18 |
+
|
19 |
+
# Camera internals
|
20 |
+
self.focal_length = self.size[1]
|
21 |
+
self.camera_center = (self.size[1] / 2, self.size[0] / 2)
|
22 |
+
self.camera_matrix = np.array(
|
23 |
+
[[self.focal_length, 0, self.camera_center[0]],
|
24 |
+
[0, self.focal_length, self.camera_center[1]],
|
25 |
+
[0, 0, 1]], dtype="double")
|
26 |
+
|
27 |
+
# Assuming no lens distortion
|
28 |
+
self.dist_coeefs = np.zeros((4, 1))
|
29 |
+
|
30 |
+
# Rotation vector and translation vector
|
31 |
+
self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
|
32 |
+
self.t_vec = np.array(
|
33 |
+
[[-14.97821226], [-10.62040383], [-2053.03596872]])
|
34 |
+
|
35 |
+
def _get_full_model_points(self, filename='assets/model.txt'):
|
36 |
+
"""Get all 68 3D model points from file"""
|
37 |
+
raw_value = []
|
38 |
+
with open(filename) as file:
|
39 |
+
for line in file:
|
40 |
+
raw_value.append(line)
|
41 |
+
model_points = np.array(raw_value, dtype=np.float32)
|
42 |
+
model_points = np.reshape(model_points, (3, -1)).T
|
43 |
+
|
44 |
+
# Transform the model into a front view.
|
45 |
+
model_points[:, 2] *= -1
|
46 |
+
|
47 |
+
return model_points
|
48 |
+
|
49 |
+
def solve(self, points):
|
50 |
+
"""Solve pose with all the 68 image points
|
51 |
+
Args:
|
52 |
+
points (np.ndarray): points on image.
|
53 |
+
|
54 |
+
Returns:
|
55 |
+
Tuple: (rotation_vector, translation_vector) as pose.
|
56 |
+
"""
|
57 |
+
|
58 |
+
if self.r_vec is None:
|
59 |
+
(_, rotation_vector, translation_vector) = cv2.solvePnP(
|
60 |
+
self.model_points_68, points, self.camera_matrix, self.dist_coeefs)
|
61 |
+
self.r_vec = rotation_vector
|
62 |
+
self.t_vec = translation_vector
|
63 |
+
|
64 |
+
(_, rotation_vector, translation_vector) = cv2.solvePnP(
|
65 |
+
self.model_points_68,
|
66 |
+
points,
|
67 |
+
self.camera_matrix,
|
68 |
+
self.dist_coeefs,
|
69 |
+
rvec=self.r_vec,
|
70 |
+
tvec=self.t_vec,
|
71 |
+
useExtrinsicGuess=True)
|
72 |
+
|
73 |
+
return (rotation_vector, translation_vector)
|
74 |
+
|
75 |
+
def visualize(self, image, pose, color=(255, 255, 255), line_width=2):
|
76 |
+
"""Draw a 3D box as annotation of pose"""
|
77 |
+
rotation_vector, translation_vector = pose
|
78 |
+
point_3d = []
|
79 |
+
rear_size = 75
|
80 |
+
rear_depth = 0
|
81 |
+
point_3d.append((-rear_size, -rear_size, rear_depth))
|
82 |
+
point_3d.append((-rear_size, rear_size, rear_depth))
|
83 |
+
point_3d.append((rear_size, rear_size, rear_depth))
|
84 |
+
point_3d.append((rear_size, -rear_size, rear_depth))
|
85 |
+
point_3d.append((-rear_size, -rear_size, rear_depth))
|
86 |
+
|
87 |
+
front_size = 100
|
88 |
+
front_depth = 100
|
89 |
+
point_3d.append((-front_size, -front_size, front_depth))
|
90 |
+
point_3d.append((-front_size, front_size, front_depth))
|
91 |
+
point_3d.append((front_size, front_size, front_depth))
|
92 |
+
point_3d.append((front_size, -front_size, front_depth))
|
93 |
+
point_3d.append((-front_size, -front_size, front_depth))
|
94 |
+
point_3d = np.array(point_3d, dtype=np.float32).reshape(-1, 3)
|
95 |
+
|
96 |
+
# Map to 2d image points
|
97 |
+
(point_2d, _) = cv2.projectPoints(point_3d,
|
98 |
+
rotation_vector,
|
99 |
+
translation_vector,
|
100 |
+
self.camera_matrix,
|
101 |
+
self.dist_coeefs)
|
102 |
+
point_2d = np.int32(point_2d.reshape(-1, 2))
|
103 |
+
|
104 |
+
# Draw all the lines
|
105 |
+
cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
|
106 |
+
cv2.line(image, tuple(point_2d[1]), tuple(
|
107 |
+
point_2d[6]), color, line_width, cv2.LINE_AA)
|
108 |
+
cv2.line(image, tuple(point_2d[2]), tuple(
|
109 |
+
point_2d[7]), color, line_width, cv2.LINE_AA)
|
110 |
+
cv2.line(image, tuple(point_2d[3]), tuple(
|
111 |
+
point_2d[8]), color, line_width, cv2.LINE_AA)
|
112 |
+
|
113 |
+
def draw_axes(self, img, pose):
|
114 |
+
R, t = pose
|
115 |
+
img = cv2.drawFrameAxes(img, self.camera_matrix,
|
116 |
+
self.dist_coeefs, R, t, 30)
|
117 |
+
|
118 |
+
def show_3d_model(self):
|
119 |
+
from matplotlib import pyplot
|
120 |
+
from mpl_toolkits.mplot3d import Axes3D
|
121 |
+
fig = pyplot.figure()
|
122 |
+
ax = Axes3D(fig)
|
123 |
+
|
124 |
+
x = self.model_points_68[:, 0]
|
125 |
+
y = self.model_points_68[:, 1]
|
126 |
+
z = self.model_points_68[:, 2]
|
127 |
+
|
128 |
+
ax.scatter(x, y, z)
|
129 |
+
ax.axis('square')
|
130 |
+
pyplot.xlabel('x')
|
131 |
+
pyplot.ylabel('y')
|
132 |
+
pyplot.show()
|
requirements.txt
CHANGED
@@ -1 +1,6 @@
|
|
1 |
-
opencv-python-headless
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
opencv-python-headless
|
2 |
+
numpy
|
3 |
+
tempfile
|
4 |
+
time
|
5 |
+
onnxruntime
|
6 |
+
os
|
utils.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""A module provides a bunch of helper functions."""
|
2 |
+
import numpy as np
|
3 |
+
|
4 |
+
|
5 |
+
def refine(boxes, max_width, max_height, shift=0.1):
|
6 |
+
"""Refine the face boxes to suit the face landmark detection's needs.
|
7 |
+
|
8 |
+
Args:
|
9 |
+
boxes: [[x1, y1, x2, y2], ...]
|
10 |
+
max_width: Value larger than this will be clipped.
|
11 |
+
max_height: Value larger than this will be clipped.
|
12 |
+
shift (float, optional): How much to shift the face box down. Defaults to 0.1.
|
13 |
+
|
14 |
+
Returns:
|
15 |
+
Refined results.
|
16 |
+
"""
|
17 |
+
refined = boxes.copy()
|
18 |
+
width = refined[:, 2] - refined[:, 0]
|
19 |
+
height = refined[:, 3] - refined[:, 1]
|
20 |
+
|
21 |
+
# Move the boxes in Y direction
|
22 |
+
shift = height * shift
|
23 |
+
refined[:, 1] += shift
|
24 |
+
refined[:, 3] += shift
|
25 |
+
center_x = (refined[:, 0] + refined[:, 2]) / 2
|
26 |
+
center_y = (refined[:, 1] + refined[:, 3]) / 2
|
27 |
+
|
28 |
+
# Make the boxes squares
|
29 |
+
square_sizes = np.maximum(width, height)
|
30 |
+
refined[:, 0] = center_x - square_sizes / 2
|
31 |
+
refined[:, 1] = center_y - square_sizes / 2
|
32 |
+
refined[:, 2] = center_x + square_sizes / 2
|
33 |
+
refined[:, 3] = center_y + square_sizes / 2
|
34 |
+
|
35 |
+
# Clip the boxes for safety
|
36 |
+
refined[:, 0] = np.clip(refined[:, 0], 0, max_width)
|
37 |
+
refined[:, 1] = np.clip(refined[:, 1], 0, max_height)
|
38 |
+
refined[:, 2] = np.clip(refined[:, 2], 0, max_width)
|
39 |
+
refined[:, 3] = np.clip(refined[:, 3], 0, max_height)
|
40 |
+
|
41 |
+
return refined
|