mediapipe-head-pose-estimation / cv2_pose_estimate.py
Akjava's picture
broken opencv-version
661ec13
raw
history blame
10.2 kB
import cv2
import numpy as np
from numpy.typing import NDArray
import sys
from mp_utils import get_pixel_cordinate_list,extract_landmark
def estimate_head_pose(im: NDArray, model_points: NDArray, image_points,camera_matrix: NDArray, dist_coeffs: NDArray,flags = cv2.SOLVEPNP_ITERATIVE,rotation_vector=None,translation_vector=None) -> tuple[NDArray, NDArray]:
"""
Estimates the head pose from an image.
Args:
image_path: Path to the image file.
model_points: 3D model points.
camera_matrix: Camera intrinsic matrix.
dist_coeffs: Lens distortion coefficients.
Returns:
rotation_vector: Estimated rotation vector.
translation_vector: Estimated translation vector.
"""
size = im.shape
'''
image_points: NDArray = np.array([
(359, 391), # Nose tip
(399, 561), # Chin
(337, 297), # Left eye left corner
(513, 301), # Right eye right corne
(345, 465), # Left Mouth corner
(453, 469) # Right mouth corner
], dtype="double")
'''
model_points = model_points +500
(success, rotation_vector, translation_vector) = cv2.solvePnP(
model_points, image_points, camera_matrix, dist_coeffs,flags=flags,
)
print(model_points)
print(image_points)
print(camera_matrix)
if not success:
raise RuntimeError("solvePnP failed.")
return rotation_vector, translation_vector
import cv2
import numpy as np
from numpy.typing import NDArray
def draw_head_pose(image: NDArray, image_points: NDArray, rotation_vector: NDArray, translation_vector: NDArray, camera_matrix: NDArray, dist_coeffs: NDArray,color_max=255,offset_x=0,offset_y=0) -> NDArray:
"""
Draws the head pose (XYZ axes) on the image.
Args:
image: Input image.
image_points: 2D image points.
rotation_vector: Estimated rotation vector.
translation_vector: Estimated translation vector.
camera_matrix: Camera intrinsic matrix.
dist_coeffs: Lens distortion coefficients.
Returns:
Image with head pose drawn.
"""
# Define the 3D points for the XYZ axes
axis_length = 500.0 # Length of the axes
axis_points_3D: NDArray = np.array([
[0, 0, 0], # Origin
[axis_length, 0, 0], # X axis
[0, axis_length, 0], # Y axis
[0, 0, axis_length] # Z axis
], dtype='float32')
# Project the 3D points to the 2D image plane
(axis_points_2D, _) = cv2.projectPoints(
axis_points_3D, rotation_vector, translation_vector, camera_matrix, dist_coeffs
)
axis_points_2D = axis_points_2D.astype(int)
# Draw the axes on the image
origin = tuple(axis_points_2D[0].ravel())
cv2.line(image, origin, tuple(axis_points_2D[1].ravel()), (0, 0, color_max), 3) # X axis (Red)
cv2.line(image, origin, tuple(axis_points_2D[2].ravel()), (0, color_max, 0), 3) # Y axis (Green)
cv2.line(image, origin, tuple(axis_points_2D[3].ravel()), (color_max, 0, 0), 3) # Z axis (Blue)
for p in image_points:
cv2.circle(image, (int(p[0]+offset_x), int(p[1]+offset_y)), 3, (0, 0, 255), -1)
return image
def main():
# 3D model points.
'''
model_points: NDArray = np.array([
(0.0, 0.0, 0.0), # Nose tip
(0.0, 300.0, -65.0), # Chin
(-225.0, -170.0, -135.0), # Left eye left corner
(225.0, -170.0, -135.0), # Right eye right corne
(-150.0, -150.0, -125.0), # Left Mouth corner
(150.0, -150.0, -125.0) # Right mouth corner
])
'''
model_points: NDArray = np.array([
(0.0, 0.0, 0.0), # Nose tip
(0.0, -344.0, -40.0), # Chin
#(0.0, -160.0, -50.0),#center of eye
(-110.0, 215.0, -60.0), #inner Left eye left corner
(110.0, 215.0, -60.0), #inner Right eye right corne
(-300.0, 250.0, -90.0), # Left eye left corner
(300.0, 250.0, -90.0), # Right eye right corne
(-185.0, -180.0, -70.0), # Left Mouth corner
(185.0, -180.0, -70.0) # Right mouth corner
])
"""
model_points: NDArray = np.array([
(0.0, 0.0, 0.0), # Nose tip
(0.0, -450.0, 0.0), # Chin
(-110.0, 175.0, -20.0), #inner Left eye left corner
(110.0, 175.0, -20.0), #inner Right eye right corne
(-300.0, 200.0, -40.0), # Left eye left corner
(300.0, 200.0, -40.0), # Right eye right corne
(-176.0, -200.0, -20.0), # Left Mouth corner
(175.0, -200.0, -20.0) # Right mouth corner
])
"""
square_model_points: NDArray = np.array([
(-100.0, -100.0, 0), # Left eye left corner
(100.0, -100.0, 0), # Right eye right corne
(-100.0, 100.0, 0), # Left Mouth corner
(100.0, 100.0, 0) # Right mouth corner
])
# Example image and camera parameters (replace with actual values)
image_path = sys.argv[1]
mp_image,face_landmarker_result = extract_landmark(image_path)
im = mp_image.numpy_view()
h,w = im.shape[:2]
cordinates = get_pixel_cordinate_list(face_landmarker_result.face_landmarks,[4,199,#6,#center of eye
33,263,133,362,61,291],w,h)
print(cordinates)
image_points: NDArray = np.array(cordinates, dtype="double")
import math
def calculate_distance(xy, xy2):
return math.sqrt((xy2[0] - xy[0])**2 + (xy2[1] - xy[1])**2)
if im is None:
raise FileNotFoundError(f"Could not open or find the image file: {image_path}")
size = im.shape
focal_length: float = calculate_distance(cordinates[0],cordinates[1])
focal_length = focal_length*1.5
print("focal length",focal_length)
center: tuple[float, float] = (size[1] / 2, size[0] / 2)
center = cordinates[0]
camera_matrix: NDArray = np.array([
[focal_length, 0, center[0]],
[0, focal_length, center[1]],
[0, 0, 1]
], dtype="double")
dist_coeffs: NDArray = np.zeros((4, 1)) # Assuming no lens distortion
# 2D image points. If you change the image, you need to change vector
'''
image_points: NDArray = np.array([
(321, 571), # Nose tip
(423, 852), # Chin
(201, 406), # Left eye left corner
(529, 363), # Right eye right corne
(336, 705), # Left Mouth corner
(483, 693) # Right mouth corner
], dtype="double")
'''
"""
image_points: NDArray = np.array([
#(663, 325), # Nose tip
(655,388),
(705, 555), # Chin
(549, 296), # inner Left eye left corner
(651, 291), # inner Right eye right corne
(453, 303), # Left eye left corner
(718, 294), # Right eye right corne
(591, 474), # Left Mouth corner
(715, 472) # Right mouth corner
], dtype="double")
"""
square_image_points: NDArray = np.array([
(549, 296), # Nose tip
(651, 291), # Chin
(573, 386), # Left eye left corner
(691, 370), # Right eye right corne
], dtype="double")
flags_list = [
cv2.SOLVEPNP_EPNP#cv2.SOLVEPNP_ITERATIVE#,cv2.SOLVEPNP_SQPNP,cv2.SOLVEPNP_EPNP
]
im_with_pose = im.copy()
for flags in flags_list:
rotation_vector, translation_vector = estimate_head_pose(image_path, model_points,image_points, camera_matrix, dist_coeffs,flags)
#print(f"Rotation Vector:\n {rotation_vector}")
#print(f"Translation Vector:\n {translation_vector}")
#initial
#im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
from scipy.spatial.transform import Rotation as R
def print_euler(rotation_vector):
order = "yxz"
rotation_matrix, _ = cv2.Rodrigues(rotation_vector)
r = R.from_matrix(rotation_matrix)
euler_angles = r.as_euler(order, degrees=True)
print(f"Euler Angles {order} (degrees): {euler_angles}")
print_euler(rotation_vector)
criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 1000, 1e-8) # 反復終了条件
rotation_vector, translation_vector = cv2.solvePnPRefineLM(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128)
print_euler(rotation_vector)
#rotation_vector[0]=0
#rotation_vector[1]=0
#rotation_vector[2]=0
#(success, rotation_vector, translation_vector) = cv2.solvePnP(
# model_points, image_points, camera_matrix, dist_coeffs,rotation_vector ,translation_vector,flags=cv2.SOLVEPNP_ITERATIVE)
im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)
#print_euler(rotation_vector)
(rotation_matrix, jacobian) = cv2.Rodrigues(rotation_vector)
mat = np.hstack((rotation_matrix, translation_vector))
#yaw,pitch,rollの取り出し
(_, _, _, _, _, _, eulerAngles) = cv2.decomposeProjectionMatrix(mat)
print(eulerAngles)
#rvec, tvec = cv2.solvePnPRefineVVS(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
#im_with_pose = draw_head_pose(im_with_pose, image_points, rvec, tvec, camera_matrix, dist_coeffs)
#square
#rvec, tvec = estimate_head_pose(image_path, square_model_points,square_image_points, camera_matrix, dist_coeffs,cv2.SOLVEPNP_IPPE_SQUARE)
#not so good
#im_with_pose = draw_head_pose(im_with_pose, square_image_points, rvec, tvec, camera_matrix, dist_coeffs)
#print(rotation_matrix)
# 回転行列をオイラー角に変換
#euler_angles = cv2.decomposeProjectionMatrix(rotation_matrix)[-1]
# オイラー角の表示 (x, y, z)
# Display image
cv2.imshow("Output", cv2.cvtColor(im_with_pose, cv2.COLOR_BGR2RGB))
cv2.waitKey(0)
cv2.destroyAllWindows()
cv2.imwrite("result.jpg",cv2.cvtColor(im_with_pose, cv2.COLOR_BGR2RGB))
if __name__ == "__main__":
main()