Spaces:

Akjava
/

mediapipe-head-pose-estimation

Running

App Files Files Community

mediapipe-head-pose-estimation / cv2_pose_estimate.py

Akjava

broken opencv-version

661ec13 2 months ago

raw

history blame

10.2 kB

	import cv2
	import numpy as np
	from numpy.typing import NDArray
	import sys
	from mp_utils import get_pixel_cordinate_list,extract_landmark
	def estimate_head_pose(im: NDArray, model_points: NDArray, image_points,camera_matrix: NDArray, dist_coeffs: NDArray,flags = cv2.SOLVEPNP_ITERATIVE,rotation_vector=None,translation_vector=None) -> tuple[NDArray, NDArray]:
	"""
	Estimates the head pose from an image.

	Args:
	image_path: Path to the image file.
	model_points: 3D model points.
	camera_matrix: Camera intrinsic matrix.
	dist_coeffs: Lens distortion coefficients.

	Returns:
	rotation_vector: Estimated rotation vector.
	translation_vector: Estimated translation vector.
	"""
	size = im.shape

	'''
	image_points: NDArray = np.array([
	(359, 391), # Nose tip
	(399, 561), # Chin
	(337, 297), # Left eye left corner
	(513, 301), # Right eye right corne
	(345, 465), # Left Mouth corner
	(453, 469) # Right mouth corner
	], dtype="double")
	'''

	model_points = model_points +500
	(success, rotation_vector, translation_vector) = cv2.solvePnP(
	model_points, image_points, camera_matrix, dist_coeffs,flags=flags,
	)
	print(model_points)
	print(image_points)
	print(camera_matrix)

	if not success:
	raise RuntimeError("solvePnP failed.")

	return rotation_vector, translation_vector

	import cv2
	import numpy as np
	from numpy.typing import NDArray

	def draw_head_pose(image: NDArray, image_points: NDArray, rotation_vector: NDArray, translation_vector: NDArray, camera_matrix: NDArray, dist_coeffs: NDArray,color_max=255,offset_x=0,offset_y=0) -> NDArray:
	"""
	Draws the head pose (XYZ axes) on the image.

	Args:
	image: Input image.
	image_points: 2D image points.
	rotation_vector: Estimated rotation vector.
	translation_vector: Estimated translation vector.
	camera_matrix: Camera intrinsic matrix.
	dist_coeffs: Lens distortion coefficients.

	Returns:
	Image with head pose drawn.
	"""

	# Define the 3D points for the XYZ axes
	axis_length = 500.0 # Length of the axes
	axis_points_3D: NDArray = np.array([
	[0, 0, 0], # Origin
	[axis_length, 0, 0], # X axis
	[0, axis_length, 0], # Y axis
	[0, 0, axis_length] # Z axis
	], dtype='float32')

	# Project the 3D points to the 2D image plane
	(axis_points_2D, _) = cv2.projectPoints(
	axis_points_3D, rotation_vector, translation_vector, camera_matrix, dist_coeffs
	)
	axis_points_2D = axis_points_2D.astype(int)

	# Draw the axes on the image
	origin = tuple(axis_points_2D[0].ravel())
	cv2.line(image, origin, tuple(axis_points_2D[1].ravel()), (0, 0, color_max), 3) # X axis (Red)
	cv2.line(image, origin, tuple(axis_points_2D[2].ravel()), (0, color_max, 0), 3) # Y axis (Green)
	cv2.line(image, origin, tuple(axis_points_2D[3].ravel()), (color_max, 0, 0), 3) # Z axis (Blue)

	for p in image_points:
	cv2.circle(image, (int(p[0]+offset_x), int(p[1]+offset_y)), 3, (0, 0, 255), -1)

	return image

	def main():
	# 3D model points.
	'''
	model_points: NDArray = np.array([
	(0.0, 0.0, 0.0), # Nose tip
	(0.0, 300.0, -65.0), # Chin
	(-225.0, -170.0, -135.0), # Left eye left corner
	(225.0, -170.0, -135.0), # Right eye right corne
	(-150.0, -150.0, -125.0), # Left Mouth corner
	(150.0, -150.0, -125.0) # Right mouth corner
	])
	'''

	model_points: NDArray = np.array([
	(0.0, 0.0, 0.0), # Nose tip
	(0.0, -344.0, -40.0), # Chin
	#(0.0, -160.0, -50.0),#center of eye
	(-110.0, 215.0, -60.0), #inner Left eye left corner
	(110.0, 215.0, -60.0), #inner Right eye right corne
	(-300.0, 250.0, -90.0), # Left eye left corner
	(300.0, 250.0, -90.0), # Right eye right corne
	(-185.0, -180.0, -70.0), # Left Mouth corner
	(185.0, -180.0, -70.0) # Right mouth corner
	])



	"""

	model_points: NDArray = np.array([
	(0.0, 0.0, 0.0), # Nose tip
	(0.0, -450.0, 0.0), # Chin
	(-110.0, 175.0, -20.0), #inner Left eye left corner
	(110.0, 175.0, -20.0), #inner Right eye right corne
	(-300.0, 200.0, -40.0), # Left eye left corner
	(300.0, 200.0, -40.0), # Right eye right corne
	(-176.0, -200.0, -20.0), # Left Mouth corner
	(175.0, -200.0, -20.0) # Right mouth corner
	])
	"""

	square_model_points: NDArray = np.array([
	(-100.0, -100.0, 0), # Left eye left corner
	(100.0, -100.0, 0), # Right eye right corne
	(-100.0, 100.0, 0), # Left Mouth corner
	(100.0, 100.0, 0) # Right mouth corner
	])


	# Example image and camera parameters (replace with actual values)
	image_path = sys.argv[1]
	mp_image,face_landmarker_result = extract_landmark(image_path)
	im = mp_image.numpy_view()
	h,w = im.shape[:2]
	cordinates = get_pixel_cordinate_list(face_landmarker_result.face_landmarks,[4,199,#6,#center of eye
	33,263,133,362,61,291],w,h)
	print(cordinates)
	image_points: NDArray = np.array(cordinates, dtype="double")



	import math
	def calculate_distance(xy, xy2):
	return math.sqrt((xy2[0] - xy[0])2 + (xy2[1] - xy[1])2)

	if im is None:
	raise FileNotFoundError(f"Could not open or find the image file: {image_path}")
	size = im.shape
	focal_length: float = calculate_distance(cordinates[0],cordinates[1])
	focal_length = focal_length*1.5
	print("focal length",focal_length)
	center: tuple[float, float] = (size[1] / 2, size[0] / 2)
	center = cordinates[0]
	camera_matrix: NDArray = np.array([
	[focal_length, 0, center[0]],
	[0, focal_length, center[1]],
	[0, 0, 1]
	], dtype="double")
	dist_coeffs: NDArray = np.zeros((4, 1)) # Assuming no lens distortion

	# 2D image points. If you change the image, you need to change vector
	'''
	image_points: NDArray = np.array([
	(321, 571), # Nose tip
	(423, 852), # Chin
	(201, 406), # Left eye left corner
	(529, 363), # Right eye right corne
	(336, 705), # Left Mouth corner
	(483, 693) # Right mouth corner
	], dtype="double")
	'''
	"""
	image_points: NDArray = np.array([
	#(663, 325), # Nose tip
	(655,388),
	(705, 555), # Chin
	(549, 296), # inner Left eye left corner
	(651, 291), # inner Right eye right corne
	(453, 303), # Left eye left corner
	(718, 294), # Right eye right corne
	(591, 474), # Left Mouth corner
	(715, 472) # Right mouth corner
	], dtype="double")
	"""


	square_image_points: NDArray = np.array([
	(549, 296), # Nose tip
	(651, 291), # Chin
	(573, 386), # Left eye left corner
	(691, 370), # Right eye right corne
	], dtype="double")

	flags_list = [
	cv2.SOLVEPNP_EPNP#cv2.SOLVEPNP_ITERATIVE#,cv2.SOLVEPNP_SQPNP,cv2.SOLVEPNP_EPNP
	]
	im_with_pose = im.copy()
	for flags in flags_list:
	rotation_vector, translation_vector = estimate_head_pose(image_path, model_points,image_points, camera_matrix, dist_coeffs,flags)
	#print(f"Rotation Vector:\n {rotation_vector}")
	#print(f"Translation Vector:\n {translation_vector}")
	#initial
	#im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)

	from scipy.spatial.transform import Rotation as R
	def print_euler(rotation_vector):
	order = "yxz"
	rotation_matrix, _ = cv2.Rodrigues(rotation_vector)

	r = R.from_matrix(rotation_matrix)
	euler_angles = r.as_euler(order, degrees=True)
	print(f"Euler Angles {order} (degrees): {euler_angles}")

	print_euler(rotation_vector)
	criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 1000, 1e-8) # 反復終了条件

	rotation_vector, translation_vector = cv2.solvePnPRefineLM(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
	im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs,128)
	print_euler(rotation_vector)

	#rotation_vector[0]=0
	#rotation_vector[1]=0
	#rotation_vector[2]=0

	#(success, rotation_vector, translation_vector) = cv2.solvePnP(
	# model_points, image_points, camera_matrix, dist_coeffs,rotation_vector ,translation_vector,flags=cv2.SOLVEPNP_ITERATIVE)

	im_with_pose = draw_head_pose(im_with_pose, image_points, rotation_vector, translation_vector, camera_matrix, dist_coeffs)

	#print_euler(rotation_vector)

	(rotation_matrix, jacobian) = cv2.Rodrigues(rotation_vector)
	mat = np.hstack((rotation_matrix, translation_vector))

	#yaw,pitch,rollの取り出し
	(_, _, _, _, _, _, eulerAngles) = cv2.decomposeProjectionMatrix(mat)
	print(eulerAngles)
	#rvec, tvec = cv2.solvePnPRefineVVS(model_points, image_points, camera_matrix, dist_coeffs, rotation_vector, translation_vector, criteria=criteria)
	#im_with_pose = draw_head_pose(im_with_pose, image_points, rvec, tvec, camera_matrix, dist_coeffs)


	#square
	#rvec, tvec = estimate_head_pose(image_path, square_model_points,square_image_points, camera_matrix, dist_coeffs,cv2.SOLVEPNP_IPPE_SQUARE)
	#not so good
	#im_with_pose = draw_head_pose(im_with_pose, square_image_points, rvec, tvec, camera_matrix, dist_coeffs)



	#print(rotation_matrix)
	# 回転行列をオイラー角に変換
	#euler_angles = cv2.decomposeProjectionMatrix(rotation_matrix)[-1]

	# オイラー角の表示 (x, y, z)

	# Display image
	cv2.imshow("Output", cv2.cvtColor(im_with_pose, cv2.COLOR_BGR2RGB))
	cv2.waitKey(0)
	cv2.destroyAllWindows()
	cv2.imwrite("result.jpg",cv2.cvtColor(im_with_pose, cv2.COLOR_BGR2RGB))

	if __name__ == "__main__":
	main()