Afnan214 commited on
Commit
45d9f6f
·
unverified ·
1 Parent(s): 088c590

pose detection

Browse files
app.py CHANGED
@@ -1,14 +1,99 @@
1
  import cv2
2
  import streamlit as st
 
 
 
 
 
 
 
3
 
4
- st.title("Webcam Live Feed")
5
- run = st.checkbox('Run')
6
- FRAME_WINDOW = st.image([])
7
- camera = cv2.VideoCapture(0)
8
 
9
- while run:
10
- _, frame = camera.read()
11
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
12
- FRAME_WINDOW.image(frame)
 
13
  else:
14
- st.write('Stopped')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import cv2
2
  import streamlit as st
3
+ import tempfile
4
+ import time
5
+ import numpy as np
6
+ from face_detection import FaceDetector
7
+ from mark_detection import MarkDetector
8
+ from pose_estimation import PoseEstimator
9
+ from utils import refine
10
 
 
 
 
 
11
 
12
+ st.title("Pose-estimation")
13
+
14
+ file_type = st.selectbox("Choose the type of file you want to upload", ("Image", "Video"))
15
+ if file_type == "Image":
16
+ uploaded_file = st.file_uploader("Upload an image of your face", type=["jpg","jpeg", "png"])
17
  else:
18
+ uploaded_video = st.file_uploader("Upload a video of your face", type=["mp4","mov","avi","mkv"])
19
+
20
+ if uploaded_file is not None:
21
+ if file_type == "Video":
22
+ tfile = tempfile.NamedTemporaryFile(delete=False)
23
+ tfile.write(uploaded_file.read())
24
+ cap = cv2.VideoCapture(tfile.name)
25
+ print(f"Video source: {tfile.name}")
26
+
27
+ #getting frame sizes
28
+ frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
29
+ frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
30
+
31
+ #face detection
32
+ face_detector = FaceDetector("assets/face_detector.onnx")
33
+ #landmark detection
34
+ mark_detector = MarkDetector("assets/face_landmarks.onnx")
35
+ #pose estimation
36
+ pose_estimator = PoseEstimator(frame_width, frame_height)
37
+
38
+ tm = cv2.TickMeter()
39
+
40
+ while True:
41
+
42
+ # Read a frame.
43
+ frame_got, frame = cap.read()
44
+ if frame_got is False:
45
+ break
46
+
47
+
48
+ # Step 1: Get faces from current frame.
49
+ faces, _ = face_detector.detect(frame, 0.7)
50
+
51
+ # Any valid face found?
52
+ if len(faces) > 0:
53
+ tm.start()
54
+
55
+ # Step 2: Detect landmarks. Crop and feed the face area into the
56
+ # mark detector. Note only the first face will be used for
57
+ # demonstration.
58
+ face = refine(faces, frame_width, frame_height, 0.15)[0]
59
+ x1, y1, x2, y2 = face[:4].astype(int)
60
+ patch = frame[y1:y2, x1:x2]
61
+
62
+ # Run the mark detection.
63
+ marks = mark_detector.detect([patch])[0].reshape([68, 2])
64
+
65
+ # Convert the locations from local face area to the global image.
66
+ marks *= (x2 - x1)
67
+ marks[:, 0] += x1
68
+ marks[:, 1] += y1
69
+
70
+ # Step 3: Try pose estimation with 68 points.
71
+ pose = pose_estimator.solve(marks)
72
+
73
+ tm.stop()
74
+
75
+ # All done. The best way to show the result would be drawing the
76
+ # pose on the frame in realtime.
77
+
78
+ # Do you want to see the pose annotation?
79
+ pose_estimator.visualize(frame, pose, color=(0, 255, 0))
80
+
81
+ # Do you want to see the axes?
82
+ # pose_estimator.draw_axes(frame, pose)
83
+
84
+ # Do you want to see the marks?
85
+ # mark_detector.visualize(frame, marks, color=(0, 255, 0))
86
+
87
+ # Do you want to see the face bounding boxes?
88
+ # face_detector.visualize(frame, faces)
89
+
90
+ # Draw the FPS on screen.
91
+ cv2.rectangle(frame, (0, 0), (90, 30), (0, 0, 0), cv2.FILLED)
92
+ cv2.putText(frame, f"FPS: {tm.getFPS():.0f}", (10, 20),
93
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
94
+
95
+ # Show preview.
96
+ cv2.imshow("Preview", frame)
97
+ if cv2.waitKey(1) == 27:
98
+ break
99
+
assets/.gitattributes ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ face_detector.onnx filter=lfs diff=lfs merge=lfs -text
2
+ face_landmarks.onnx filter=lfs diff=lfs merge=lfs -text
assets/face_detector.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08bd3e3febd685ffb4fd7d9d16a101614cc7fc6ab08029d3cb6abe5fb12d3c64
3
+ size 3291589
assets/face_landmarks.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e848578c7ac2474b35e0c4b9a1498ff4145c525552b3d845bdb1f66c8a9d85c2
3
+ size 29402017
assets/model.txt ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -73.393523
2
+ -72.775014
3
+ -70.533638
4
+ -66.850058
5
+ -59.790187
6
+ -48.368973
7
+ -34.121101
8
+ -17.875411
9
+ 0.098749
10
+ 17.477031
11
+ 32.648966
12
+ 46.372358
13
+ 57.343480
14
+ 64.388482
15
+ 68.212038
16
+ 70.486405
17
+ 71.375822
18
+ -61.119406
19
+ -51.287588
20
+ -37.804800
21
+ -24.022754
22
+ -11.635713
23
+ 12.056636
24
+ 25.106256
25
+ 38.338588
26
+ 51.191007
27
+ 60.053851
28
+ 0.653940
29
+ 0.804809
30
+ 0.992204
31
+ 1.226783
32
+ -14.772472
33
+ -7.180239
34
+ 0.555920
35
+ 8.272499
36
+ 15.214351
37
+ -46.047290
38
+ -37.674688
39
+ -27.883856
40
+ -19.648268
41
+ -28.272965
42
+ -38.082418
43
+ 19.265868
44
+ 27.894191
45
+ 37.437529
46
+ 45.170805
47
+ 38.196454
48
+ 28.764989
49
+ -28.916267
50
+ -17.533194
51
+ -6.684590
52
+ 0.381001
53
+ 8.375443
54
+ 18.876618
55
+ 28.794412
56
+ 19.057574
57
+ 8.956375
58
+ 0.381549
59
+ -7.428895
60
+ -18.160634
61
+ -24.377490
62
+ -6.897633
63
+ 0.340663
64
+ 8.444722
65
+ 24.474473
66
+ 8.449166
67
+ 0.205322
68
+ -7.198266
69
+ -29.801432
70
+ -10.949766
71
+ 7.929818
72
+ 26.074280
73
+ 42.564390
74
+ 56.481080
75
+ 67.246992
76
+ 75.056892
77
+ 77.061286
78
+ 74.758448
79
+ 66.929021
80
+ 56.311389
81
+ 42.419126
82
+ 25.455880
83
+ 6.990805
84
+ -11.666193
85
+ -30.365191
86
+ -49.361602
87
+ -58.769795
88
+ -61.996155
89
+ -61.033399
90
+ -56.686759
91
+ -57.391033
92
+ -61.902186
93
+ -62.777713
94
+ -59.302347
95
+ -50.190255
96
+ -42.193790
97
+ -30.993721
98
+ -19.944596
99
+ -8.414541
100
+ 2.598255
101
+ 4.751589
102
+ 6.562900
103
+ 4.661005
104
+ 2.643046
105
+ -37.471411
106
+ -42.730510
107
+ -42.711517
108
+ -36.754742
109
+ -35.134493
110
+ -34.919043
111
+ -37.032306
112
+ -43.342445
113
+ -43.110822
114
+ -38.086515
115
+ -35.532024
116
+ -35.484289
117
+ 28.612716
118
+ 22.172187
119
+ 19.029051
120
+ 20.721118
121
+ 19.035460
122
+ 22.394109
123
+ 28.079924
124
+ 36.298248
125
+ 39.634575
126
+ 40.395647
127
+ 39.836405
128
+ 36.677899
129
+ 28.677771
130
+ 25.475976
131
+ 26.014269
132
+ 25.326198
133
+ 28.323008
134
+ 30.596216
135
+ 31.408738
136
+ 30.844876
137
+ 47.667532
138
+ 45.909403
139
+ 44.842580
140
+ 43.141114
141
+ 38.635298
142
+ 30.750622
143
+ 18.456453
144
+ 3.609035
145
+ -0.881698
146
+ 5.181201
147
+ 19.176563
148
+ 30.770570
149
+ 37.628629
150
+ 40.886309
151
+ 42.281449
152
+ 44.142567
153
+ 47.140426
154
+ 14.254422
155
+ 7.268147
156
+ 0.442051
157
+ -6.606501
158
+ -11.967398
159
+ -12.051204
160
+ -7.315098
161
+ -1.022953
162
+ 5.349435
163
+ 11.615746
164
+ -13.380835
165
+ -21.150853
166
+ -29.284036
167
+ -36.948060
168
+ -20.132003
169
+ -23.536684
170
+ -25.944448
171
+ -23.695741
172
+ -20.858157
173
+ 7.037989
174
+ 3.021217
175
+ 1.353629
176
+ -0.111088
177
+ -0.147273
178
+ 1.476612
179
+ -0.665746
180
+ 0.247660
181
+ 1.696435
182
+ 4.894163
183
+ 0.282961
184
+ -1.172675
185
+ -2.240310
186
+ -15.934335
187
+ -22.611355
188
+ -23.748437
189
+ -22.721995
190
+ -15.610679
191
+ -3.217393
192
+ -14.987997
193
+ -22.554245
194
+ -23.591626
195
+ -22.406106
196
+ -15.121907
197
+ -4.785684
198
+ -20.893742
199
+ -22.220479
200
+ -21.025520
201
+ -5.712776
202
+ -20.671489
203
+ -21.903670
204
+ -20.328022
face_detection.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import numpy as np
4
+ import onnxruntime
5
+
6
+
7
+ #Below are functions that are used to calculate distances
8
+ def distance2bbox(points, distance, max_shape=None):
9
+ x1 = points[:, 0] - distance[:, 0]
10
+ y1 = points[:, 1] - distance[:, 1]
11
+ x2 = points[:, 0] + distance[:, 2]
12
+ y2 = points[:, 1] + distance[:, 3]
13
+ if max_shape is not None:
14
+ x1 = x1.clamp(min=0, max=max_shape[1])
15
+ y1 = y1.clamp(min=0, max=max_shape[0])
16
+ x2 = x2.clamp(min=0, max=max_shape[1])
17
+ y2 = y2.clamp(min=0, max=max_shape[0])
18
+ return np.stack([x1, y1, x2, y2], axis=-1)
19
+
20
+
21
+ def distance2kps(points, distance, max_shape=None):
22
+ preds = []
23
+ for i in range(0, distance.shape[1], 2):
24
+ px = points[:, i % 2] + distance[:, i]
25
+ py = points[:, i % 2 + 1] + distance[:, i + 1]
26
+ if max_shape is not None:
27
+ px = px.clamp(min=0, max=max_shape[1])
28
+ py = py.clamp(min=0, max=max_shape[0])
29
+ preds.append(px)
30
+ preds.append(py)
31
+ return np.stack(preds, axis=-1)
32
+
33
+ #Face Detector
34
+ class FaceDetector:
35
+ def __init__(self, model_file):
36
+ assert os.path.exists(model_file), f"File not found: {model_file}"
37
+ self.center_cache = {}
38
+ self.nms_threshold = 0.4
39
+ self.session = onnxruntime.InferenceSession(
40
+ model_file, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
41
+
42
+ # Get model configurations from the model file.
43
+ # What is the input like?
44
+ input_cfg = self.session.get_inputs()[0]
45
+ input_name = input_cfg.name
46
+ input_shape = input_cfg.shape
47
+ self.input_size = tuple(input_shape[2:4][::-1])
48
+
49
+ # How about the outputs?
50
+ outputs = self.session.get_outputs()
51
+ output_names = []
52
+ for o in outputs:
53
+ output_names.append(o.name)
54
+ self.input_name = input_name
55
+ self.output_names = output_names
56
+
57
+ # And any key points?
58
+ self._with_kps = False
59
+ self._anchor_ratio = 1.0
60
+ self._num_anchors = 1
61
+
62
+ if len(outputs) == 6:
63
+ self._offset = 3
64
+ self._strides = [8, 16, 32]
65
+ self._num_anchors = 2
66
+ elif len(outputs) == 9:
67
+ self._offset = 3
68
+ self._strides = [8, 16, 32]
69
+ self._num_anchors = 2
70
+ self._with_kps = True
71
+ elif len(outputs) == 10:
72
+ self._offset = 5
73
+ self._strides = [8, 16, 32, 64, 128]
74
+ self._num_anchors = 1
75
+ elif len(outputs) == 15:
76
+ self._offset = 5
77
+ self._strides = [8, 16, 32, 64, 128]
78
+ self._num_anchors = 1
79
+ self._with_kps = True
80
+
81
+ def _preprocess(self, image):
82
+ inputs = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
83
+ inputs = inputs - np.array([127.5, 127.5, 127.5])
84
+ inputs = inputs / 128
85
+ inputs = np.expand_dims(inputs, axis=0)
86
+ inputs = np.transpose(inputs, [0, 3, 1, 2])
87
+
88
+ return inputs.astype(np.float32)
89
+
90
+ def forward(self, img, threshold):
91
+ scores_list = []
92
+ bboxes_list = []
93
+ kpss_list = []
94
+
95
+ inputs = self._preprocess(img)
96
+ predictions = self.session.run(
97
+ self.output_names, {self.input_name: inputs})
98
+
99
+ input_height = inputs.shape[2]
100
+ input_width = inputs.shape[3]
101
+ offset = self._offset
102
+
103
+ for idx, stride in enumerate(self._strides):
104
+ scores_pred = predictions[idx]
105
+ bbox_preds = predictions[idx + offset] * stride
106
+ if self._with_kps:
107
+ kps_preds = predictions[idx + offset * 2] * stride
108
+
109
+ # Generate the anchors.
110
+ height = input_height // stride
111
+ width = input_width // stride
112
+ key = (height, width, stride)
113
+
114
+ if key in self.center_cache:
115
+ anchor_centers = self.center_cache[key]
116
+ else:
117
+ # solution-3:
118
+ anchor_centers = np.stack(
119
+ np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
120
+ anchor_centers = (anchor_centers * stride).reshape((-1, 2))
121
+
122
+ if self._num_anchors > 1:
123
+ anchor_centers = np.stack(
124
+ [anchor_centers] * self._num_anchors, axis=1).reshape((-1, 2))
125
+
126
+ if len(self.center_cache) < 100:
127
+ self.center_cache[key] = anchor_centers
128
+
129
+ # solution-1, c style:
130
+ # anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 )
131
+ # for i in range(height):
132
+ # anchor_centers[i, :, 1] = i
133
+ # for i in range(width):
134
+ # anchor_centers[:, i, 0] = i
135
+
136
+ # solution-2:
137
+ # ax = np.arange(width, dtype=np.float32)
138
+ # ay = np.arange(height, dtype=np.float32)
139
+ # xv, yv = np.meshgrid(np.arange(width), np.arange(height))
140
+ # anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32)
141
+
142
+ # Filter the results by scores and threshold.
143
+ pos_inds = np.where(scores_pred >= threshold)[0]
144
+ bboxes = distance2bbox(anchor_centers, bbox_preds)
145
+ pos_scores = scores_pred[pos_inds]
146
+ pos_bboxes = bboxes[pos_inds]
147
+ scores_list.append(pos_scores)
148
+ bboxes_list.append(pos_bboxes)
149
+
150
+ if self._with_kps:
151
+ kpss = distance2kps(anchor_centers, kps_preds)
152
+ kpss = kpss.reshape((kpss.shape[0], -1, 2))
153
+ pos_kpss = kpss[pos_inds]
154
+ kpss_list.append(pos_kpss)
155
+
156
+ return scores_list, bboxes_list, kpss_list
157
+
158
+ def _nms(self, detections):
159
+ """None max suppression."""
160
+ x1 = detections[:, 0]
161
+ y1 = detections[:, 1]
162
+ x2 = detections[:, 2]
163
+ y2 = detections[:, 3]
164
+ scores = detections[:, 4]
165
+
166
+ areas = (x2 - x1 + 1) * (y2 - y1 + 1)
167
+ order = scores.argsort()[::-1]
168
+
169
+ keep = []
170
+ while order.size > 0:
171
+ i = order[0]
172
+ keep.append(i)
173
+
174
+ _x1 = np.maximum(x1[i], x1[order[1:]])
175
+ _y1 = np.maximum(y1[i], y1[order[1:]])
176
+ _x2 = np.minimum(x2[i], x2[order[1:]])
177
+ _y2 = np.minimum(y2[i], y2[order[1:]])
178
+
179
+ w = np.maximum(0.0, _x2 - _x1 + 1)
180
+ h = np.maximum(0.0, _y2 - _y1 + 1)
181
+ inter = w * h
182
+ ovr = inter / (areas[i] + areas[order[1:]] - inter)
183
+
184
+ inds = np.where(ovr <= self.nms_threshold)[0]
185
+ order = order[inds + 1]
186
+
187
+ return keep
188
+
189
+ def detect(self, img, threshold=0.5, input_size=None, max_num=0, metric='default'):
190
+ input_size = self.input_size if input_size is None else input_size
191
+
192
+ # Rescale the image?
193
+ img_height, img_width, _ = img.shape
194
+ ratio_img = float(img_height) / img_width
195
+
196
+ input_width, input_height = input_size
197
+ ratio_model = float(input_height) / input_width
198
+
199
+ if ratio_img > ratio_model:
200
+ new_height = input_height
201
+ new_width = int(new_height / ratio_img)
202
+ else:
203
+ new_width = input_width
204
+ new_height = int(new_width * ratio_img)
205
+
206
+ det_scale = float(new_height) / img_height
207
+ resized_img = cv2.resize(img, (new_width, new_height))
208
+
209
+ det_img = np.zeros((input_size[1], input_size[0], 3), dtype=np.uint8)
210
+ det_img[:new_height, :new_width, :] = resized_img
211
+
212
+ scores_list, bboxes_list, kpss_list = self.forward(det_img, threshold)
213
+ scores = np.vstack(scores_list)
214
+ scores_ravel = scores.ravel()
215
+ order = scores_ravel.argsort()[::-1]
216
+
217
+ bboxes = np.vstack(bboxes_list) / det_scale
218
+
219
+ if self._with_kps:
220
+ kpss = np.vstack(kpss_list) / det_scale
221
+ pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
222
+ pre_det = pre_det[order, :]
223
+
224
+ keep = self._nms(pre_det)
225
+
226
+ det = pre_det[keep, :]
227
+
228
+ if self._with_kps:
229
+ kpss = kpss[order, :, :]
230
+ kpss = kpss[keep, :, :]
231
+ else:
232
+ kpss = None
233
+
234
+ if max_num > 0 and det.shape[0] > max_num:
235
+ area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
236
+ img_center = img.shape[0] // 2, img.shape[1] // 2
237
+ offsets = np.vstack([
238
+ (det[:, 0] + det[:, 2]) / 2 - img_center[1],
239
+ (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
240
+
241
+ offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
242
+
243
+ if metric == 'max':
244
+ values = area
245
+ else:
246
+ # some extra weight on the centering
247
+ values = area - offset_dist_squared * 2.0
248
+
249
+ # some extra weight on the centering
250
+ bindex = np.argsort(values)[::-1]
251
+ bindex = bindex[0:max_num]
252
+ det = det[bindex, :]
253
+
254
+ if kpss is not None:
255
+ kpss = kpss[bindex, :]
256
+
257
+ return det, kpss
258
+
259
+ def visualize(self, image, results, box_color=(0, 255, 0), text_color=(0, 0, 0)):
260
+ """Visualize the detection results.
261
+
262
+ Args:
263
+ image (np.ndarray): image to draw marks on.
264
+ results (np.ndarray): face detection results.
265
+ box_color (tuple, optional): color of the face box. Defaults to (0, 255, 0).
266
+ text_color (tuple, optional): color of the face marks (5 points). Defaults to (0, 0, 255).
267
+ """
268
+ for det in results:
269
+ bbox = det[0:4].astype(np.int32)
270
+ conf = det[-1]
271
+ cv2.rectangle(image, (bbox[0], bbox[1]),
272
+ (bbox[2], bbox[3]), box_color)
273
+ label = f"face: {conf:.2f}"
274
+ label_size, base_line = cv2.getTextSize(
275
+ label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
276
+ cv2.rectangle(image, (bbox[0], bbox[1] - label_size[1]),
277
+ (bbox[2], bbox[1] + base_line), box_color, cv2.FILLED)
278
+ cv2.putText(image, label, (bbox[0], bbox[1]),
279
+ cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color)
mark_detection.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import cv2
4
+ import numpy as np
5
+ import onnxruntime as ort
6
+
7
+
8
+ class MarkDetector:
9
+ """Facial landmark detector by Convolutional Neural Network"""
10
+
11
+ def __init__(self, model_file):
12
+ """Initialize a mark detector.
13
+
14
+ Args:
15
+ model_file (str): ONNX model path.
16
+ """
17
+ assert os.path.exists(model_file), f"File not found: {model_file}"
18
+ self._input_size = 128
19
+ self.model = ort.InferenceSession(
20
+ model_file, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
21
+
22
+ def _preprocess(self, bgrs):
23
+ """Preprocess the inputs to meet the model's needs.
24
+
25
+ Args:
26
+ bgrs (np.ndarray): a list of input images in BGR format.
27
+
28
+ Returns:
29
+ tf.Tensor: a tensor
30
+ """
31
+ rgbs = []
32
+ for img in bgrs:
33
+ img = cv2.resize(img, (self._input_size, self._input_size))
34
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
35
+ rgbs.append(img)
36
+
37
+ return rgbs
38
+
39
+ def detect(self, images):
40
+ """Detect facial marks from an face image.
41
+
42
+ Args:
43
+ images: a list of face images.
44
+
45
+ Returns:
46
+ marks: the facial marks as a numpy array of shape [Batch, 68*2].
47
+ """
48
+ inputs = self._preprocess(images)
49
+ marks = self.model.run(["dense_1"], {"image_input": inputs})
50
+ return np.array(marks)
51
+
52
+ def visualize(self, image, marks, color=(255, 255, 255)):
53
+ """Draw mark points on image"""
54
+ for mark in marks:
55
+ cv2.circle(image, (int(mark[0]), int(
56
+ mark[1])), 1, color, -1, cv2.LINE_AA)
pose_estimation.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Estimate head pose according to the facial landmarks"""
2
+ import cv2
3
+ import numpy as np
4
+
5
+
6
+ class PoseEstimator:
7
+ """Estimate head pose according to the facial landmarks"""
8
+
9
+ def __init__(self, image_width, image_height):
10
+ """Init a pose estimator.
11
+
12
+ Args:
13
+ image_width (int): input image width
14
+ image_height (int): input image height
15
+ """
16
+ self.size = (image_height, image_width)
17
+ self.model_points_68 = self._get_full_model_points()
18
+
19
+ # Camera internals
20
+ self.focal_length = self.size[1]
21
+ self.camera_center = (self.size[1] / 2, self.size[0] / 2)
22
+ self.camera_matrix = np.array(
23
+ [[self.focal_length, 0, self.camera_center[0]],
24
+ [0, self.focal_length, self.camera_center[1]],
25
+ [0, 0, 1]], dtype="double")
26
+
27
+ # Assuming no lens distortion
28
+ self.dist_coeefs = np.zeros((4, 1))
29
+
30
+ # Rotation vector and translation vector
31
+ self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
32
+ self.t_vec = np.array(
33
+ [[-14.97821226], [-10.62040383], [-2053.03596872]])
34
+
35
+ def _get_full_model_points(self, filename='assets/model.txt'):
36
+ """Get all 68 3D model points from file"""
37
+ raw_value = []
38
+ with open(filename) as file:
39
+ for line in file:
40
+ raw_value.append(line)
41
+ model_points = np.array(raw_value, dtype=np.float32)
42
+ model_points = np.reshape(model_points, (3, -1)).T
43
+
44
+ # Transform the model into a front view.
45
+ model_points[:, 2] *= -1
46
+
47
+ return model_points
48
+
49
+ def solve(self, points):
50
+ """Solve pose with all the 68 image points
51
+ Args:
52
+ points (np.ndarray): points on image.
53
+
54
+ Returns:
55
+ Tuple: (rotation_vector, translation_vector) as pose.
56
+ """
57
+
58
+ if self.r_vec is None:
59
+ (_, rotation_vector, translation_vector) = cv2.solvePnP(
60
+ self.model_points_68, points, self.camera_matrix, self.dist_coeefs)
61
+ self.r_vec = rotation_vector
62
+ self.t_vec = translation_vector
63
+
64
+ (_, rotation_vector, translation_vector) = cv2.solvePnP(
65
+ self.model_points_68,
66
+ points,
67
+ self.camera_matrix,
68
+ self.dist_coeefs,
69
+ rvec=self.r_vec,
70
+ tvec=self.t_vec,
71
+ useExtrinsicGuess=True)
72
+
73
+ return (rotation_vector, translation_vector)
74
+
75
+ def visualize(self, image, pose, color=(255, 255, 255), line_width=2):
76
+ """Draw a 3D box as annotation of pose"""
77
+ rotation_vector, translation_vector = pose
78
+ point_3d = []
79
+ rear_size = 75
80
+ rear_depth = 0
81
+ point_3d.append((-rear_size, -rear_size, rear_depth))
82
+ point_3d.append((-rear_size, rear_size, rear_depth))
83
+ point_3d.append((rear_size, rear_size, rear_depth))
84
+ point_3d.append((rear_size, -rear_size, rear_depth))
85
+ point_3d.append((-rear_size, -rear_size, rear_depth))
86
+
87
+ front_size = 100
88
+ front_depth = 100
89
+ point_3d.append((-front_size, -front_size, front_depth))
90
+ point_3d.append((-front_size, front_size, front_depth))
91
+ point_3d.append((front_size, front_size, front_depth))
92
+ point_3d.append((front_size, -front_size, front_depth))
93
+ point_3d.append((-front_size, -front_size, front_depth))
94
+ point_3d = np.array(point_3d, dtype=np.float32).reshape(-1, 3)
95
+
96
+ # Map to 2d image points
97
+ (point_2d, _) = cv2.projectPoints(point_3d,
98
+ rotation_vector,
99
+ translation_vector,
100
+ self.camera_matrix,
101
+ self.dist_coeefs)
102
+ point_2d = np.int32(point_2d.reshape(-1, 2))
103
+
104
+ # Draw all the lines
105
+ cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
106
+ cv2.line(image, tuple(point_2d[1]), tuple(
107
+ point_2d[6]), color, line_width, cv2.LINE_AA)
108
+ cv2.line(image, tuple(point_2d[2]), tuple(
109
+ point_2d[7]), color, line_width, cv2.LINE_AA)
110
+ cv2.line(image, tuple(point_2d[3]), tuple(
111
+ point_2d[8]), color, line_width, cv2.LINE_AA)
112
+
113
+ def draw_axes(self, img, pose):
114
+ R, t = pose
115
+ img = cv2.drawFrameAxes(img, self.camera_matrix,
116
+ self.dist_coeefs, R, t, 30)
117
+
118
+ def show_3d_model(self):
119
+ from matplotlib import pyplot
120
+ from mpl_toolkits.mplot3d import Axes3D
121
+ fig = pyplot.figure()
122
+ ax = Axes3D(fig)
123
+
124
+ x = self.model_points_68[:, 0]
125
+ y = self.model_points_68[:, 1]
126
+ z = self.model_points_68[:, 2]
127
+
128
+ ax.scatter(x, y, z)
129
+ ax.axis('square')
130
+ pyplot.xlabel('x')
131
+ pyplot.ylabel('y')
132
+ pyplot.show()
requirements.txt CHANGED
@@ -1 +1,6 @@
1
- opencv-python-headless
 
 
 
 
 
 
1
+ opencv-python-headless
2
+ numpy
3
+ tempfile
4
+ time
5
+ onnxruntime
6
+ os
utils.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """A module provides a bunch of helper functions."""
2
+ import numpy as np
3
+
4
+
5
+ def refine(boxes, max_width, max_height, shift=0.1):
6
+ """Refine the face boxes to suit the face landmark detection's needs.
7
+
8
+ Args:
9
+ boxes: [[x1, y1, x2, y2], ...]
10
+ max_width: Value larger than this will be clipped.
11
+ max_height: Value larger than this will be clipped.
12
+ shift (float, optional): How much to shift the face box down. Defaults to 0.1.
13
+
14
+ Returns:
15
+ Refined results.
16
+ """
17
+ refined = boxes.copy()
18
+ width = refined[:, 2] - refined[:, 0]
19
+ height = refined[:, 3] - refined[:, 1]
20
+
21
+ # Move the boxes in Y direction
22
+ shift = height * shift
23
+ refined[:, 1] += shift
24
+ refined[:, 3] += shift
25
+ center_x = (refined[:, 0] + refined[:, 2]) / 2
26
+ center_y = (refined[:, 1] + refined[:, 3]) / 2
27
+
28
+ # Make the boxes squares
29
+ square_sizes = np.maximum(width, height)
30
+ refined[:, 0] = center_x - square_sizes / 2
31
+ refined[:, 1] = center_y - square_sizes / 2
32
+ refined[:, 2] = center_x + square_sizes / 2
33
+ refined[:, 3] = center_y + square_sizes / 2
34
+
35
+ # Clip the boxes for safety
36
+ refined[:, 0] = np.clip(refined[:, 0], 0, max_width)
37
+ refined[:, 1] = np.clip(refined[:, 1], 0, max_height)
38
+ refined[:, 2] = np.clip(refined[:, 2], 0, max_width)
39
+ refined[:, 3] = np.clip(refined[:, 3], 0, max_height)
40
+
41
+ return refined