Spaces:

Afnan214
/

pose-detection

Sleeping

App Files Files Community

Afnan214 commited on Nov 5, 2024

Commit

45d9f6f

unverified ·

1 Parent(s): 088c590

pose detection

Browse files

Files changed (10) hide show

app.py +94 -9
assets/.gitattributes +2 -0
assets/face_detector.onnx +3 -0
assets/face_landmarks.onnx +3 -0
assets/model.txt +204 -0
face_detection.py +279 -0
mark_detection.py +56 -0
pose_estimation.py +132 -0
requirements.txt +6 -1
utils.py +41 -0

app.py CHANGED Viewed

@@ -1,14 +1,99 @@
 import cv2
 import streamlit as st
-st.title("Webcam Live Feed")
-run = st.checkbox('Run')
-FRAME_WINDOW = st.image([])
-camera = cv2.VideoCapture(0)
-while run:
-    _, frame = camera.read()
-    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-    FRAME_WINDOW.image(frame)
 else:
-    st.write('Stopped')

 import cv2
 import streamlit as st
+import tempfile
+import time
+import numpy as np
+from face_detection import FaceDetector
+from mark_detection import MarkDetector
+from pose_estimation import PoseEstimator
+from utils import refine
+st.title("Pose-estimation")
+file_type = st.selectbox("Choose the type of file you want to upload", ("Image", "Video"))
+if file_type == "Image":
+    uploaded_file = st.file_uploader("Upload an image of your face", type=["jpg","jpeg", "png"])
 else:
+    uploaded_video = st.file_uploader("Upload a video of your face", type=["mp4","mov","avi","mkv"])
+if uploaded_file is not None:
+    if file_type == "Video":
+        tfile = tempfile.NamedTemporaryFile(delete=False)
+        tfile.write(uploaded_file.read())
+        cap = cv2.VideoCapture(tfile.name)
+        print(f"Video source: {tfile.name}")
+        #getting frame sizes
+        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        #face detection
+        face_detector = FaceDetector("assets/face_detector.onnx")
+        #landmark detection
+        mark_detector = MarkDetector("assets/face_landmarks.onnx")
+        #pose estimation
+        pose_estimator = PoseEstimator(frame_width, frame_height)
+        tm = cv2.TickMeter()
+        while True:
+            # Read a frame.
+            frame_got, frame = cap.read()
+            if frame_got is False:
+                break
+            # Step 1: Get faces from current frame.
+            faces, _ = face_detector.detect(frame, 0.7)
+            # Any valid face found?
+            if len(faces) > 0:
+                tm.start()
+                # Step 2: Detect landmarks. Crop and feed the face area into the
+                # mark detector. Note only the first face will be used for
+                # demonstration.
+                face = refine(faces, frame_width, frame_height, 0.15)[0]
+                x1, y1, x2, y2 = face[:4].astype(int)
+                patch = frame[y1:y2, x1:x2]
+                # Run the mark detection.
+                marks = mark_detector.detect([patch])[0].reshape([68, 2])
+                # Convert the locations from local face area to the global image.
+                marks *= (x2 - x1)
+                marks[:, 0] += x1
+                marks[:, 1] += y1
+                # Step 3: Try pose estimation with 68 points.
+                pose = pose_estimator.solve(marks)
+                tm.stop()
+                # All done. The best way to show the result would be drawing the
+                # pose on the frame in realtime.
+                # Do you want to see the pose annotation?
+                pose_estimator.visualize(frame, pose, color=(0, 255, 0))
+                # Do you want to see the axes?
+                # pose_estimator.draw_axes(frame, pose)
+                # Do you want to see the marks?
+                # mark_detector.visualize(frame, marks, color=(0, 255, 0))
+                # Do you want to see the face bounding boxes?
+                # face_detector.visualize(frame, faces)
+            # Draw the FPS on screen.
+            cv2.rectangle(frame, (0, 0), (90, 30), (0, 0, 0), cv2.FILLED)
+            cv2.putText(frame, f"FPS: {tm.getFPS():.0f}", (10, 20),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255))
+            # Show preview.
+            cv2.imshow("Preview", frame)
+            if cv2.waitKey(1) == 27:
+                break

assets/.gitattributes ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ face_detector.onnx filter=lfs diff=lfs merge=lfs -text
2	+ face_landmarks.onnx filter=lfs diff=lfs merge=lfs -text

assets/face_detector.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08bd3e3febd685ffb4fd7d9d16a101614cc7fc6ab08029d3cb6abe5fb12d3c64
+size 3291589

assets/face_landmarks.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e848578c7ac2474b35e0c4b9a1498ff4145c525552b3d845bdb1f66c8a9d85c2
+size 29402017

assets/model.txt ADDED Viewed

	@@ -0,0 +1,204 @@

+-73.393523
+-72.775014
+-70.533638
+-66.850058
+-59.790187
+-48.368973
+-34.121101
+-17.875411
+0.098749
+17.477031
+32.648966
+46.372358
+57.343480
+64.388482
+68.212038
+70.486405
+71.375822
+-61.119406
+-51.287588
+-37.804800
+-24.022754
+-11.635713
+12.056636
+25.106256
+38.338588
+51.191007
+60.053851
+0.653940
+0.804809
+0.992204
+1.226783
+-14.772472
+-7.180239
+0.555920
+8.272499
+15.214351
+-46.047290
+-37.674688
+-27.883856
+-19.648268
+-28.272965
+-38.082418
+19.265868
+27.894191
+37.437529
+45.170805
+38.196454
+28.764989
+-28.916267
+-17.533194
+-6.684590
+0.381001
+8.375443
+18.876618
+28.794412
+19.057574
+8.956375
+0.381549
+-7.428895
+-18.160634
+-24.377490
+-6.897633
+0.340663
+8.444722
+24.474473
+8.449166
+0.205322
+-7.198266
+-29.801432
+-10.949766
+7.929818
+26.074280
+42.564390
+56.481080
+67.246992
+75.056892
+77.061286
+74.758448
+66.929021
+56.311389
+42.419126
+25.455880
+6.990805
+-11.666193
+-30.365191
+-49.361602
+-58.769795
+-61.996155
+-61.033399
+-56.686759
+-57.391033
+-61.902186
+-62.777713
+-59.302347
+-50.190255
+-42.193790
+-30.993721
+-19.944596
+-8.414541
+2.598255
+4.751589
+6.562900
+4.661005
+2.643046
+-37.471411
+-42.730510
+-42.711517
+-36.754742
+-35.134493
+-34.919043
+-37.032306
+-43.342445
+-43.110822
+-38.086515
+-35.532024
+-35.484289
+28.612716
+22.172187
+19.029051
+20.721118
+19.035460
+22.394109
+28.079924
+36.298248
+39.634575
+40.395647
+39.836405
+36.677899
+28.677771
+25.475976
+26.014269
+25.326198
+28.323008
+30.596216
+31.408738
+30.844876
+47.667532
+45.909403
+44.842580
+43.141114
+38.635298
+30.750622
+18.456453
+3.609035
+-0.881698
+5.181201
+19.176563
+30.770570
+37.628629
+40.886309
+42.281449
+44.142567
+47.140426
+14.254422
+7.268147
+0.442051
+-6.606501
+-11.967398
+-12.051204
+-7.315098
+-1.022953
+5.349435
+11.615746
+-13.380835
+-21.150853
+-29.284036
+-36.948060
+-20.132003
+-23.536684
+-25.944448
+-23.695741
+-20.858157
+7.037989
+3.021217
+1.353629
+-0.111088
+-0.147273
+1.476612
+-0.665746
+0.247660
+1.696435
+4.894163
+0.282961
+-1.172675
+-2.240310
+-15.934335
+-22.611355
+-23.748437
+-22.721995
+-15.610679
+-3.217393
+-14.987997
+-22.554245
+-23.591626
+-22.406106
+-15.121907
+-4.785684
+-20.893742
+-22.220479
+-21.025520
+-5.712776
+-20.671489
+-21.903670
+-20.328022

face_detection.py ADDED Viewed

	@@ -0,0 +1,279 @@

+import os
+import cv2
+import numpy as np
+import onnxruntime
+#Below are functions that are used to calculate distances
+def distance2bbox(points, distance, max_shape=None):
+    x1 = points[:, 0] - distance[:, 0]
+    y1 = points[:, 1] - distance[:, 1]
+    x2 = points[:, 0] + distance[:, 2]
+    y2 = points[:, 1] + distance[:, 3]
+    if max_shape is not None:
+        x1 = x1.clamp(min=0, max=max_shape[1])
+        y1 = y1.clamp(min=0, max=max_shape[0])
+        x2 = x2.clamp(min=0, max=max_shape[1])
+        y2 = y2.clamp(min=0, max=max_shape[0])
+    return np.stack([x1, y1, x2, y2], axis=-1)
+def distance2kps(points, distance, max_shape=None):
+    preds = []
+    for i in range(0, distance.shape[1], 2):
+        px = points[:, i % 2] + distance[:, i]
+        py = points[:, i % 2 + 1] + distance[:, i + 1]
+        if max_shape is not None:
+            px = px.clamp(min=0, max=max_shape[1])
+            py = py.clamp(min=0, max=max_shape[0])
+        preds.append(px)
+        preds.append(py)
+    return np.stack(preds, axis=-1)
+#Face Detector
+class FaceDetector:
+    def __init__(self, model_file):
+        assert os.path.exists(model_file), f"File not found: {model_file}"
+        self.center_cache = {}
+        self.nms_threshold = 0.4
+        self.session = onnxruntime.InferenceSession(
+            model_file, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+        # Get model configurations from the model file.
+        # What is the input like?
+        input_cfg = self.session.get_inputs()[0]
+        input_name = input_cfg.name
+        input_shape = input_cfg.shape
+        self.input_size = tuple(input_shape[2:4][::-1])
+        # How about the outputs?
+        outputs = self.session.get_outputs()
+        output_names = []
+        for o in outputs:
+            output_names.append(o.name)
+        self.input_name = input_name
+        self.output_names = output_names
+        # And any key points?
+        self._with_kps = False
+        self._anchor_ratio = 1.0
+        self._num_anchors = 1
+        if len(outputs) == 6:
+            self._offset = 3
+            self._strides = [8, 16, 32]
+            self._num_anchors = 2
+        elif len(outputs) == 9:
+            self._offset = 3
+            self._strides = [8, 16, 32]
+            self._num_anchors = 2
+            self._with_kps = True
+        elif len(outputs) == 10:
+            self._offset = 5
+            self._strides = [8, 16, 32, 64, 128]
+            self._num_anchors = 1
+        elif len(outputs) == 15:
+            self._offset = 5
+            self._strides = [8, 16, 32, 64, 128]
+            self._num_anchors = 1
+            self._with_kps = True
+    def _preprocess(self, image):
+        inputs = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
+        inputs = inputs - np.array([127.5, 127.5, 127.5])
+        inputs = inputs / 128
+        inputs = np.expand_dims(inputs, axis=0)
+        inputs = np.transpose(inputs, [0, 3, 1, 2])
+        return inputs.astype(np.float32)
+    def forward(self, img, threshold):
+        scores_list = []
+        bboxes_list = []
+        kpss_list = []
+        inputs = self._preprocess(img)
+        predictions = self.session.run(
+            self.output_names, {self.input_name: inputs})
+        input_height = inputs.shape[2]
+        input_width = inputs.shape[3]
+        offset = self._offset
+        for idx, stride in enumerate(self._strides):
+            scores_pred = predictions[idx]
+            bbox_preds = predictions[idx + offset] * stride
+            if self._with_kps:
+                kps_preds = predictions[idx + offset * 2] * stride
+            # Generate the anchors.
+            height = input_height // stride
+            width = input_width // stride
+            key = (height, width, stride)
+            if key in self.center_cache:
+                anchor_centers = self.center_cache[key]
+            else:
+                # solution-3:
+                anchor_centers = np.stack(
+                    np.mgrid[:height, :width][::-1], axis=-1).astype(np.float32)
+                anchor_centers = (anchor_centers * stride).reshape((-1, 2))
+                if self._num_anchors > 1:
+                    anchor_centers = np.stack(
+                        [anchor_centers] * self._num_anchors, axis=1).reshape((-1, 2))
+                if len(self.center_cache) < 100:
+                    self.center_cache[key] = anchor_centers
+                # solution-1, c style:
+                # anchor_centers = np.zeros( (height, width, 2), dtype=np.float32 )
+                # for i in range(height):
+                #    anchor_centers[i, :, 1] = i
+                # for i in range(width):
+                #    anchor_centers[:, i, 0] = i
+                # solution-2:
+                # ax = np.arange(width, dtype=np.float32)
+                # ay = np.arange(height, dtype=np.float32)
+                # xv, yv = np.meshgrid(np.arange(width), np.arange(height))
+                # anchor_centers = np.stack([xv, yv], axis=-1).astype(np.float32)
+            # Filter the results by scores and threshold.
+            pos_inds = np.where(scores_pred >= threshold)[0]
+            bboxes = distance2bbox(anchor_centers, bbox_preds)
+            pos_scores = scores_pred[pos_inds]
+            pos_bboxes = bboxes[pos_inds]
+            scores_list.append(pos_scores)
+            bboxes_list.append(pos_bboxes)
+            if self._with_kps:
+                kpss = distance2kps(anchor_centers, kps_preds)
+                kpss = kpss.reshape((kpss.shape[0], -1, 2))
+                pos_kpss = kpss[pos_inds]
+                kpss_list.append(pos_kpss)
+        return scores_list, bboxes_list, kpss_list
+    def _nms(self, detections):
+        """None max suppression."""
+        x1 = detections[:, 0]
+        y1 = detections[:, 1]
+        x2 = detections[:, 2]
+        y2 = detections[:, 3]
+        scores = detections[:, 4]
+        areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+        order = scores.argsort()[::-1]
+        keep = []
+        while order.size > 0:
+            i = order[0]
+            keep.append(i)
+            _x1 = np.maximum(x1[i], x1[order[1:]])
+            _y1 = np.maximum(y1[i], y1[order[1:]])
+            _x2 = np.minimum(x2[i], x2[order[1:]])
+            _y2 = np.minimum(y2[i], y2[order[1:]])
+            w = np.maximum(0.0, _x2 - _x1 + 1)
+            h = np.maximum(0.0, _y2 - _y1 + 1)
+            inter = w * h
+            ovr = inter / (areas[i] + areas[order[1:]] - inter)
+            inds = np.where(ovr <= self.nms_threshold)[0]
+            order = order[inds + 1]
+        return keep
+    def detect(self, img, threshold=0.5, input_size=None, max_num=0, metric='default'):
+        input_size = self.input_size if input_size is None else input_size
+        # Rescale the image?
+        img_height, img_width, _ = img.shape
+        ratio_img = float(img_height) / img_width
+        input_width, input_height = input_size
+        ratio_model = float(input_height) / input_width
+        if ratio_img > ratio_model:
+            new_height = input_height
+            new_width = int(new_height / ratio_img)
+        else:
+            new_width = input_width
+            new_height = int(new_width * ratio_img)
+        det_scale = float(new_height) / img_height
+        resized_img = cv2.resize(img, (new_width, new_height))
+        det_img = np.zeros((input_size[1], input_size[0], 3), dtype=np.uint8)
+        det_img[:new_height, :new_width, :] = resized_img
+        scores_list, bboxes_list, kpss_list = self.forward(det_img, threshold)
+        scores = np.vstack(scores_list)
+        scores_ravel = scores.ravel()
+        order = scores_ravel.argsort()[::-1]
+        bboxes = np.vstack(bboxes_list) / det_scale
+        if self._with_kps:
+            kpss = np.vstack(kpss_list) / det_scale
+        pre_det = np.hstack((bboxes, scores)).astype(np.float32, copy=False)
+        pre_det = pre_det[order, :]
+        keep = self._nms(pre_det)
+        det = pre_det[keep, :]
+        if self._with_kps:
+            kpss = kpss[order, :, :]
+            kpss = kpss[keep, :, :]
+        else:
+            kpss = None
+        if max_num > 0 and det.shape[0] > max_num:
+            area = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
+            img_center = img.shape[0] // 2, img.shape[1] // 2
+            offsets = np.vstack([
+                (det[:, 0] + det[:, 2]) / 2 - img_center[1],
+                (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
+            offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
+            if metric == 'max':
+                values = area
+            else:
+                # some extra weight on the centering
+                values = area - offset_dist_squared * 2.0
+            # some extra weight on the centering
+            bindex = np.argsort(values)[::-1]
+            bindex = bindex[0:max_num]
+            det = det[bindex, :]
+            if kpss is not None:
+                kpss = kpss[bindex, :]
+        return det, kpss
+    def visualize(self, image, results, box_color=(0, 255, 0), text_color=(0, 0, 0)):
+        """Visualize the detection results.
+        Args:
+            image (np.ndarray): image to draw marks on.
+            results (np.ndarray): face detection results.
+            box_color (tuple, optional): color of the face box. Defaults to (0, 255, 0).
+            text_color (tuple, optional): color of the face marks (5 points). Defaults to (0, 0, 255).
+        """
+        for det in results:
+            bbox = det[0:4].astype(np.int32)
+            conf = det[-1]
+            cv2.rectangle(image, (bbox[0], bbox[1]),
+                          (bbox[2], bbox[3]), box_color)
+            label = f"face: {conf:.2f}"
+            label_size, base_line = cv2.getTextSize(
+                label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            cv2.rectangle(image, (bbox[0], bbox[1] - label_size[1]),
+                          (bbox[2], bbox[1] + base_line), box_color, cv2.FILLED)
+            cv2.putText(image, label, (bbox[0], bbox[1]),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, text_color)

mark_detection.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import os
+import cv2
+import numpy as np
+import onnxruntime as ort
+class MarkDetector:
+    """Facial landmark detector by Convolutional Neural Network"""
+    def __init__(self, model_file):
+        """Initialize a mark detector.
+        Args:
+            model_file (str): ONNX model path.
+        """
+        assert os.path.exists(model_file), f"File not found: {model_file}"
+        self._input_size = 128
+        self.model = ort.InferenceSession(
+            model_file, providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+    def _preprocess(self, bgrs):
+        """Preprocess the inputs to meet the model's needs.
+        Args:
+            bgrs (np.ndarray): a list of input images in BGR format.
+        Returns:
+            tf.Tensor: a tensor
+        """
+        rgbs = []
+        for img in bgrs:
+            img = cv2.resize(img, (self._input_size, self._input_size))
+            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+            rgbs.append(img)
+        return rgbs
+    def detect(self, images):
+        """Detect facial marks from an face image.
+        Args:
+            images: a list of face images.
+        Returns:
+            marks: the facial marks as a numpy array of shape [Batch, 68*2].
+        """
+        inputs = self._preprocess(images)
+        marks = self.model.run(["dense_1"], {"image_input": inputs})
+        return np.array(marks)
+    def visualize(self, image, marks, color=(255, 255, 255)):
+        """Draw mark points on image"""
+        for mark in marks:
+            cv2.circle(image, (int(mark[0]), int(
+                mark[1])), 1, color, -1, cv2.LINE_AA)

pose_estimation.py ADDED Viewed

	@@ -0,0 +1,132 @@

+"""Estimate head pose according to the facial landmarks"""
+import cv2
+import numpy as np
+class PoseEstimator:
+    """Estimate head pose according to the facial landmarks"""
+    def __init__(self, image_width, image_height):
+        """Init a pose estimator.
+        Args:
+            image_width (int): input image width
+            image_height (int): input image height
+        """
+        self.size = (image_height, image_width)
+        self.model_points_68 = self._get_full_model_points()
+        # Camera internals
+        self.focal_length = self.size[1]
+        self.camera_center = (self.size[1] / 2, self.size[0] / 2)
+        self.camera_matrix = np.array(
+            [[self.focal_length, 0, self.camera_center[0]],
+             [0, self.focal_length, self.camera_center[1]],
+             [0, 0, 1]], dtype="double")
+        # Assuming no lens distortion
+        self.dist_coeefs = np.zeros((4, 1))
+        # Rotation vector and translation vector
+        self.r_vec = np.array([[0.01891013], [0.08560084], [-3.14392813]])
+        self.t_vec = np.array(
+            [[-14.97821226], [-10.62040383], [-2053.03596872]])
+    def _get_full_model_points(self, filename='assets/model.txt'):
+        """Get all 68 3D model points from file"""
+        raw_value = []
+        with open(filename) as file:
+            for line in file:
+                raw_value.append(line)
+        model_points = np.array(raw_value, dtype=np.float32)
+        model_points = np.reshape(model_points, (3, -1)).T
+        # Transform the model into a front view.
+        model_points[:, 2] *= -1
+        return model_points
+    def solve(self, points):
+        """Solve pose with all the 68 image points
+        Args:
+            points (np.ndarray): points on image.
+        Returns:
+            Tuple: (rotation_vector, translation_vector) as pose.
+        """
+        if self.r_vec is None:
+            (_, rotation_vector, translation_vector) = cv2.solvePnP(
+                self.model_points_68, points, self.camera_matrix, self.dist_coeefs)
+            self.r_vec = rotation_vector
+            self.t_vec = translation_vector
+        (_, rotation_vector, translation_vector) = cv2.solvePnP(
+            self.model_points_68,
+            points,
+            self.camera_matrix,
+            self.dist_coeefs,
+            rvec=self.r_vec,
+            tvec=self.t_vec,
+            useExtrinsicGuess=True)
+        return (rotation_vector, translation_vector)
+    def visualize(self, image, pose, color=(255, 255, 255), line_width=2):
+        """Draw a 3D box as annotation of pose"""
+        rotation_vector, translation_vector = pose
+        point_3d = []
+        rear_size = 75
+        rear_depth = 0
+        point_3d.append((-rear_size, -rear_size, rear_depth))
+        point_3d.append((-rear_size, rear_size, rear_depth))
+        point_3d.append((rear_size, rear_size, rear_depth))
+        point_3d.append((rear_size, -rear_size, rear_depth))
+        point_3d.append((-rear_size, -rear_size, rear_depth))
+        front_size = 100
+        front_depth = 100
+        point_3d.append((-front_size, -front_size, front_depth))
+        point_3d.append((-front_size, front_size, front_depth))
+        point_3d.append((front_size, front_size, front_depth))
+        point_3d.append((front_size, -front_size, front_depth))
+        point_3d.append((-front_size, -front_size, front_depth))
+        point_3d = np.array(point_3d, dtype=np.float32).reshape(-1, 3)
+        # Map to 2d image points
+        (point_2d, _) = cv2.projectPoints(point_3d,
+                                          rotation_vector,
+                                          translation_vector,
+                                          self.camera_matrix,
+                                          self.dist_coeefs)
+        point_2d = np.int32(point_2d.reshape(-1, 2))
+        # Draw all the lines
+        cv2.polylines(image, [point_2d], True, color, line_width, cv2.LINE_AA)
+        cv2.line(image, tuple(point_2d[1]), tuple(
+            point_2d[6]), color, line_width, cv2.LINE_AA)
+        cv2.line(image, tuple(point_2d[2]), tuple(
+            point_2d[7]), color, line_width, cv2.LINE_AA)
+        cv2.line(image, tuple(point_2d[3]), tuple(
+            point_2d[8]), color, line_width, cv2.LINE_AA)
+    def draw_axes(self, img, pose):
+        R, t = pose
+        img = cv2.drawFrameAxes(img, self.camera_matrix,
+                                self.dist_coeefs, R, t, 30)
+    def show_3d_model(self):
+        from matplotlib import pyplot
+        from mpl_toolkits.mplot3d import Axes3D
+        fig = pyplot.figure()
+        ax = Axes3D(fig)
+        x = self.model_points_68[:, 0]
+        y = self.model_points_68[:, 1]
+        z = self.model_points_68[:, 2]
+        ax.scatter(x, y, z)
+        ax.axis('square')
+        pyplot.xlabel('x')
+        pyplot.ylabel('y')
+        pyplot.show()

requirements.txt CHANGED Viewed

	@@ -1 +1,6 @@
1	- opencv-python-headless

+opencv-python-headless
+numpy
+tempfile
+time
+onnxruntime
+os

utils.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""A module provides a bunch of helper functions."""
+import numpy as np
+def refine(boxes, max_width, max_height, shift=0.1):
+    """Refine the face boxes to suit the face landmark detection's needs.
+    Args:
+        boxes: [[x1, y1, x2, y2], ...]
+        max_width: Value larger than this will be clipped.
+        max_height: Value larger than this will be clipped.
+        shift (float, optional): How much to shift the face box down. Defaults to 0.1.
+    Returns:
+       Refined results.
+    """
+    refined = boxes.copy()
+    width = refined[:, 2] - refined[:, 0]
+    height = refined[:, 3] - refined[:, 1]
+    # Move the boxes in Y direction
+    shift = height * shift
+    refined[:, 1] += shift
+    refined[:, 3] += shift
+    center_x = (refined[:, 0] + refined[:, 2]) / 2
+    center_y = (refined[:, 1] + refined[:, 3]) / 2
+    # Make the boxes squares
+    square_sizes = np.maximum(width, height)
+    refined[:, 0] = center_x - square_sizes / 2
+    refined[:, 1] = center_y - square_sizes / 2
+    refined[:, 2] = center_x + square_sizes / 2
+    refined[:, 3] = center_y + square_sizes / 2
+    # Clip the boxes for safety
+    refined[:, 0] = np.clip(refined[:, 0], 0, max_width)
+    refined[:, 1] = np.clip(refined[:, 1], 0, max_height)
+    refined[:, 2] = np.clip(refined[:, 2], 0, max_width)
+    refined[:, 3] = np.clip(refined[:, 3], 0, max_height)
+    return refined