Spaces:

ttxskk
/

AiOS

Running on L40S

AiOS / detrsmpl /models /utils /SMPLX.py

ttxskk

update

d7e58f0 21 days ago

28.8 kB

	from typing import List

	import torch
	import torch.nn.functional as F
	from smplx.utils import find_joint_kin_chain

	from detrsmpl.core.conventions.keypoints_mapping import (
	get_keypoint_idx,
	get_keypoint_idxs_by_part,
	)
	from detrsmpl.utils.geometry import weak_perspective_projection


	class SMPLXHandMergeFunc():
	"""This function use predictions from hand model to update the hand params
	(right_hand_pose, left_hand_pose, wrist_pose) in predictions from body
	model."""
	def __init__(self, body_model, convention='smplx'):
	self.body_model = body_model
	self.convention = convention
	self.left_hand_idxs = get_keypoint_idxs_by_part(
	'left_hand', self.convention)
	self.left_wrist_idx = get_keypoint_idx('left_wrist', self.convention)
	self.left_hand_idxs.append(self.left_wrist_idx)
	self.left_wrist_kin_chain = find_joint_kin_chain(
	self.left_wrist_idx, self.body_model.parents)

	self.right_hand_idxs = get_keypoint_idxs_by_part(
	'right_hand', self.convention)
	self.right_wrist_idx = get_keypoint_idx('right_wrist', self.convention)
	self.right_hand_idxs.append(self.right_wrist_idx)
	self.right_wrist_kin_chain = find_joint_kin_chain(
	self.right_wrist_idx, self.body_model.parents)

	def __call__(self, body_predictions, hand_predictions):
	"""Function
	Args:
	body_predictions (dict): The prediction from body model.
	hand_predictions (dict): The prediction from hand model.
	Returns:
	dict: Merged prediction.
	"""
	pred_param = body_predictions['pred_param']
	global_orient = pred_param['global_orient']
	body_pose = pred_param['body_pose']
	pred_cam = body_predictions['pred_cam']
	batch_size = pred_cam.shape[0]
	device = pred_cam.device
	hands_from_body_idxs = torch.arange(0,
	2 * batch_size,
	dtype=torch.long,
	device=device)
	right_hand_from_body_idxs = hands_from_body_idxs[:batch_size]
	left_hand_from_body_idxs = hands_from_body_idxs[batch_size:]

	parent_rots = []
	right_wrist_parent_rot = find_joint_global_rotation(
	self.right_wrist_kin_chain[1:], global_orient, body_pose)

	left_wrist_parent_rot = find_joint_global_rotation(
	self.left_wrist_kin_chain[1:], global_orient, body_pose)
	left_to_right_wrist_parent_rot = flip_rotmat(left_wrist_parent_rot)

	parent_rots += [right_wrist_parent_rot, left_to_right_wrist_parent_rot]
	parent_rots = torch.cat(parent_rots, dim=0)

	wrist_pose_from_hand = hand_predictions['pred_param']['global_orient']
	# Undo the rotation of the parent joints to make the wrist rotation
	# relative again
	wrist_pose_from_hand = torch.matmul(
	parent_rots.reshape(-1, 3, 3).transpose(1, 2),
	wrist_pose_from_hand.reshape(-1, 3, 3))

	right_hand_wrist = wrist_pose_from_hand[right_hand_from_body_idxs]
	left_hand_wrist = flip_rotmat(
	wrist_pose_from_hand[left_hand_from_body_idxs])
	right_hand_pose = hand_predictions['pred_param']['right_hand_pose'][
	right_hand_from_body_idxs]
	left_hand_pose = flip_rotmat(
	hand_predictions['pred_param']['right_hand_pose']
	[left_hand_from_body_idxs])

	body_predictions['pred_param']['right_hand_pose'] = right_hand_pose
	body_predictions['pred_param']['left_hand_pose'] = left_hand_pose
	body_predictions['pred_param']['body_pose'][:, self.right_wrist_idx -
	1] = right_hand_wrist
	body_predictions['pred_param']['body_pose'][:, self.left_wrist_idx -
	1] = left_hand_wrist

	return body_predictions


	class SMPLXFaceMergeFunc():
	"""This function use predictions from face model to update the face params
	(jaw_pose, expression) in predictions from body model."""
	def __init__(self,
	body_model,
	convention='smplx',
	num_expression_coeffs=10):
	self.body_model = body_model
	self.convention = convention
	self.num_expression_coeffs = num_expression_coeffs

	def __call__(self, body_predictions, face_predictions):
	"""Function
	Args:
	body_predictions (dict): The prediction from body model.
	face_predictions (dict): The prediction from face model.
	Returns:
	dict: Merged prediction.
	"""
	body_predictions['pred_param']['jaw_pose'] = face_predictions[
	'pred_param']['jaw_pose']
	body_predictions['pred_param']['expression'] = face_predictions[
	'pred_param']['expression'][:, :self.num_expression_coeffs]
	return body_predictions


	def points_to_bbox(points, bbox_scale_factor: float = 1.0):
	"""Get scaled bounding box from keypoints 2D."""
	min_coords, _ = torch.min(points, dim=1)
	xmin, ymin = min_coords[:, 0], min_coords[:, 1]
	max_coords, _ = torch.max(points, dim=1)
	xmax, ymax = max_coords[:, 0], max_coords[:, 1]

	center = torch.stack([xmax + xmin, ymax + ymin], dim=-1) * 0.5

	width = (xmax - xmin)
	height = (ymax - ymin)

	# Convert the bounding box to a square box
	size = torch.max(width, height) * bbox_scale_factor

	return center, size


	def get_crop_info(points,
	img_metas,
	scale_factor: float = 1.0,
	crop_size: int = 256):
	"""Get the transformation of points on the cropped image to the points on
	the original image."""
	device = points.device
	dtype = points.dtype
	batch_size = points.shape[0]
	# Get the image to crop transformations and bounding box sizes
	crop_transforms = []
	img_bbox_sizes = []
	for img_meta in img_metas:
	crop_transforms.append(img_meta['crop_transform'])
	img_bbox_sizes.append(img_meta['scale'].max())

	img_bbox_sizes = torch.tensor(img_bbox_sizes, dtype=dtype, device=device)

	crop_transforms = torch.tensor(crop_transforms, dtype=dtype, device=device)

	crop_transforms = torch.cat([
	crop_transforms,
	torch.tensor([0.0, 0.0, 1.0], dtype=dtype, device=device).expand(
	[batch_size, 1, 3])
	],
	dim=1)

	inv_crop_transforms = torch.inverse(crop_transforms)

	# center on the cropped body image
	center_body_crop, bbox_size = points_to_bbox(
	points, bbox_scale_factor=scale_factor)

	orig_bbox_size = bbox_size / crop_size * img_bbox_sizes

	# Compute the center of the crop in the original image
	center = (torch.einsum(
	'bij,bj->bi', [inv_crop_transforms[:, :2, :2], center_body_crop]) +
	inv_crop_transforms[:, :2, 2])

	return {
	'center': center.reshape(-1, 2),
	'orig_bbox_size': orig_bbox_size,
	# 'bbox_size': bbox_size.reshape(-1),
	'inv_crop_transforms': inv_crop_transforms,
	# 'center_body_crop': 2 * center_body_crop / (crop_size-1) - 1,
	}


	def concat_images(images: List[torch.Tensor]):
	"""Concat images of different size."""
	sizes = [img.shape[1:] for img in images]
	H, W = [max(s) for s in zip(*sizes)]
	batch_size = len(images)
	batched_shape = (batch_size, images[0].shape[0], H, W)
	batched = torch.zeros(batched_shape,
	device=images[0].device,
	dtype=images[0].dtype)
	for ii, img in enumerate(images):
	shape = img.shape
	batched[ii, :shape[0], :shape[1], :shape[2]] = img
	return batched


	def flip_rotmat(pose_rotmat):
	"""Flip function.

	Flip rotmat.
	"""
	rot_mats = pose_rotmat.reshape(-1, 9).clone()

	rot_mats[:, [1, 2, 3, 6]] *= -1
	return rot_mats.view_as(pose_rotmat)


	def find_joint_global_rotation(kin_chain, root_pose, body_pose):
	"""Computes the absolute rotation of a joint from the kinematic chain."""
	# Create a single vector with all the poses
	parents_pose = torch.cat([root_pose, body_pose], dim=1)[:, kin_chain]
	output_pose = parents_pose[:, 0]
	for idx in range(1, parents_pose.shape[1]):
	output_pose = torch.bmm(parents_pose[:, idx], output_pose)
	return output_pose


	class CropSampler():
	"""This function crops the HD images using bilinear interpolation."""
	def __init__(self, crop_size: int = 256) -> None:
	"""Uses bilinear sampling to extract square crops.

	This module expects a high resolution image as input and a bounding
	box, described by its' center and size. It then proceeds to extract
	a sub-image using the provided information through bilinear
	interpolation.

	Parameters
	----------
	crop_size: int
	The desired size for the crop.
	"""
	super(CropSampler, self).__init__()

	self.crop_size = crop_size
	x = torch.arange(0, crop_size, dtype=torch.float32) / (crop_size - 1)
	grid_y, grid_x = torch.meshgrid(x, x)

	points = torch.stack([grid_y.flatten(), grid_x.flatten()], axis=1)

	self.grid = points.unsqueeze(dim=0)

	def _sample_padded(self, full_imgs, sampling_grid):
	""""""
	# Get the sub-images using bilinear interpolation
	return F.grid_sample(full_imgs, sampling_grid, align_corners=True)

	def __call__(self, full_imgs, center, bbox_size):
	"""Crops the HD images using the provided bounding boxes.

	Parameters
	----------
	full_imgs: ImageList
	An image list structure with the full resolution images
	center: torch.Tensor
	A Bx2 tensor that contains the coordinates of the center of
	the bounding box that will be cropped from the original
	image
	bbox_size: torch.Tensor
	A size B tensor that contains the size of the corp

	Returns
	-------
	cropped_images: torch.Tensoror
	The images cropped from the high resolution input
	sampling_grid: torch.Tensor
	The grid used to sample the crops
	"""

	batch_size, _, H, W = full_imgs.shape
	self.grid = self.grid.to(device=full_imgs.device)
	transforms = torch.eye(3,
	dtype=full_imgs.dtype,
	device=full_imgs.device).reshape(
	1, 3, 3).expand(batch_size, -1,
	-1).contiguous()

	hd_to_crop = torch.eye(3,
	dtype=full_imgs.dtype,
	device=full_imgs.device).reshape(
	1, 3, 3).expand(batch_size, -1,
	-1).contiguous()

	# Create the transformation that maps crop pixels to image coordinates,
	# i.e. pixel (0, 0) from the crop_size x crop_size grid gets mapped to
	# the top left of the bounding box, pixel
	# (crop_size - 1, crop_size - 1) to the bottom right corner of the
	# bounding box
	transforms[:, 0, 0] = bbox_size # / (self.crop_size - 1)
	transforms[:, 1, 1] = bbox_size # / (self.crop_size - 1)
	transforms[:, 0, 2] = center[:, 0] - bbox_size * 0.5
	transforms[:, 1, 2] = center[:, 1] - bbox_size * 0.5

	hd_to_crop[:, 0, 0] = 2 * (self.crop_size - 1) / bbox_size
	hd_to_crop[:, 1, 1] = 2 * (self.crop_size - 1) / bbox_size
	hd_to_crop[:, 0,
	2] = -(center[:, 0] - bbox_size * 0.5) * hd_to_crop[:, 0,
	0] - 1
	hd_to_crop[:, 1,
	2] = -(center[:, 1] - bbox_size * 0.5) * hd_to_crop[:, 1,
	1] - 1

	size_bbox_sizer = torch.eye(3,
	dtype=full_imgs.dtype,
	device=full_imgs.device).reshape(
	1, 3, 3).expand(batch_size, -1,
	-1).contiguous()

	# Normalize the coordinates to [-1, 1] for the grid_sample function
	size_bbox_sizer[:, 0, 0] = 2.0 / (W - 1)
	size_bbox_sizer[:, 1, 1] = 2.0 / (H - 1)
	size_bbox_sizer[:, :2, 2] = -1

	# full_transform = transforms
	full_transform = torch.bmm(size_bbox_sizer, transforms)

	batch_grid = self.grid.expand(batch_size, -1, -1)
	# Convert the grid to image coordinates using the transformations above
	sampling_grid = (
	torch.bmm(full_transform[:, :2, :2], batch_grid.transpose(1, 2)) +
	full_transform[:, :2, [2]]).transpose(1, 2)
	sampling_grid = sampling_grid.reshape(-1, self.crop_size,
	self.crop_size,
	2).transpose(1, 2)

	out_images = self._sample_padded(full_imgs, sampling_grid)

	return {
	'images': out_images,
	'sampling_grid': sampling_grid.reshape(batch_size, -1, 2),
	'transform': transforms,
	'hd_to_crop': hd_to_crop,
	}


	class SMPLXHandCropFunc():
	"""This function crop hand image from the original image.

	Use the output keypoints predicted by the body model to locate the hand
	position.
	"""
	def __init__(self,
	model_head,
	body_model,
	convention='smplx',
	img_res=256,
	scale_factor=2.0,
	crop_size=224,
	condition_hand_wrist_pose=True,
	condition_hand_shape=False,
	condition_hand_finger_pose=True):
	self.model_head = model_head
	self.body_model = body_model
	self.img_res = img_res
	self.convention = convention
	self.left_hand_idxs = get_keypoint_idxs_by_part(
	'left_hand', self.convention)
	left_wrist_idx = get_keypoint_idx('left_wrist', self.convention)
	self.left_hand_idxs.append(left_wrist_idx)
	self.left_wrist_kin_chain = find_joint_kin_chain(
	left_wrist_idx, self.body_model.parents)

	self.right_hand_idxs = get_keypoint_idxs_by_part(
	'right_hand', self.convention)
	right_wrist_idx = get_keypoint_idx('right_wrist', self.convention)
	self.right_hand_idxs.append(right_wrist_idx)
	self.right_wrist_kin_chain = find_joint_kin_chain(
	right_wrist_idx, self.body_model.parents)

	self.scale_factor = scale_factor
	self.hand_cropper = CropSampler(crop_size)

	self.condition_hand_wrist_pose = condition_hand_wrist_pose
	self.condition_hand_shape = condition_hand_shape
	self.condition_hand_finger_pose = condition_hand_finger_pose

	def build_hand_mean(self, global_orient, body_pose, betas, left_hand_pose,
	raw_right_hand_pose, batch_size):
	"""Builds the initial point for the iterative regressor of the hand."""
	hand_mean = []

	# if self.condition_hand_on_body:
	# Convert the absolute pose to the latent representation
	if self.condition_hand_wrist_pose:
	# Compute the absolute pose of the right wrist
	right_wrist_pose_abs = find_joint_global_rotation(
	self.right_wrist_kin_chain, global_orient, body_pose)
	right_wrist_pose = right_wrist_pose_abs[:, :3, :2].contiguous(
	).reshape(batch_size, -1)

	# Compute the absolute rotation for the left wrist
	left_wrist_pose_abs = find_joint_global_rotation(
	self.left_wrist_kin_chain, global_orient, body_pose)
	# Flip the left wrist to the right
	left_to_right_wrist_pose = flip_rotmat(left_wrist_pose_abs)

	# Convert to the latent representation
	left_to_right_wrist_pose = left_to_right_wrist_pose[:, :3, :
	2].contiguous(
	).reshape(
	batch_size,
	-1)
	else:
	right_wrist_pose = self.model_head.get_mean('global_orient',
	batch_size=batch_size)
	left_to_right_wrist_pose = self.model_head.get_mean(
	'global_orient', batch_size=batch_size)

	# Convert the pose of the left hand to the right hand and project
	# it to the encoder space
	left_to_right_hand_pose = flip_rotmat(
	left_hand_pose)[:, :, :3, :2].contiguous().reshape(batch_size, -1)
	right_hand_pose = raw_right_hand_pose.reshape(batch_size, -1)
	camera_mean = self.model_head.get_mean('camera', batch_size=batch_size)

	shape_condition = (betas if self.condition_hand_shape else
	self.model_head.get_mean('shape',
	batch_size=batch_size))
	right_finger_pose_condition = (
	right_hand_pose if self.condition_hand_finger_pose else
	self.model_head.get_mean('right_hand_pose', batch_size=batch_size))
	right_hand_mean = torch.cat([
	right_wrist_pose, right_finger_pose_condition, shape_condition,
	camera_mean
	],
	dim=1)

	left_finger_pose_condition = (
	left_to_right_hand_pose if self.condition_hand_finger_pose else
	self.model_head.get_mean('right_hand_pose', batch_size=batch_size))
	# Should be Bx31
	left_hand_mean = torch.cat([
	left_to_right_wrist_pose, left_finger_pose_condition,
	shape_condition, camera_mean
	],
	dim=1)

	hand_mean += [right_hand_mean, left_hand_mean]
	hand_mean = torch.cat(hand_mean, dim=0)

	return hand_mean

	def __call__(self, body_predictions, img_metas):
	"""Function
	Args:
	body_predictions (dict): The prediction from body model.
	img_metas (dict): Information of the input images.
	Returns:
	all_hand_imgs (torch.tensor): Cropped hand images.
	hand_mean (torch.tensor): Mean value of hand params.
	crop_info (dict): Hand crop transforms.
	"""
	pred_param = body_predictions['pred_param']
	pred_cam = body_predictions['pred_cam']
	pred_raw = body_predictions['pred_raw']
	pred_output = self.body_model(**pred_param)

	pred_keypoints3d = pred_output['joints']
	pred_keypoints2d = weak_perspective_projection(
	pred_keypoints3d,
	scale=pred_cam[:, 0],
	translation=pred_cam[:, 1:3])
	# concat ori_img
	full_images = []
	for img_meta in img_metas:
	full_images.append(img_meta['ori_img'].to(device=pred_cam.device))
	full_imgs = concat_images(full_images)

	# left hand
	left_hand_joints = (pred_keypoints2d[:, self.left_hand_idxs] * 0.5 +
	0.5) * (self.img_res - 1)
	left_hand_points_to_crop = get_crop_info(left_hand_joints, img_metas,
	self.scale_factor,
	self.img_res)
	left_hand_center = left_hand_points_to_crop['center']
	left_hand_orig_bbox_size = left_hand_points_to_crop['orig_bbox_size']
	left_hand_inv_crop_transforms = left_hand_points_to_crop[
	'inv_crop_transforms']

	left_hand_cropper_out = self.hand_cropper(full_imgs, left_hand_center,
	left_hand_orig_bbox_size)
	left_hand_crops = left_hand_cropper_out['images']
	# left_hand_points = left_hand_cropper_out['sampling_grid']
	left_hand_crop_transform = left_hand_cropper_out['transform']

	# right hand
	right_hand_joints = (pred_keypoints2d[:, self.right_hand_idxs] * 0.5 +
	0.5) * (self.img_res - 1)
	right_hand_points_to_crop = get_crop_info(right_hand_joints, img_metas,
	self.scale_factor,
	self.img_res)
	right_hand_center = right_hand_points_to_crop['center']
	right_hand_orig_bbox_size = right_hand_points_to_crop['orig_bbox_size']
	# right_hand_inv_crop_transforms = right_hand_points_to_crop[
	# 'inv_crop_transforms']
	right_hand_cropper_out = self.hand_cropper(full_imgs,
	right_hand_center,
	right_hand_orig_bbox_size)
	right_hand_crops = right_hand_cropper_out['images']
	# right_hand_points = right_hand_cropper_out['sampling_grid']
	right_hand_crop_transform = right_hand_cropper_out['transform']

	# concat
	all_hand_imgs = []
	all_hand_imgs.append(right_hand_crops)
	all_hand_imgs.append(torch.flip(left_hand_crops, dims=(-1, )))

	# [right_hand , left hand]
	all_hand_imgs = torch.cat(all_hand_imgs, dim=0)
	hand_mean = self.build_hand_mean(pred_param['global_orient'],
	pred_param['body_pose'],
	pred_param['betas'],
	pred_param['left_hand_pose'],
	pred_raw['raw_right_hand_pose'],
	batch_size=full_imgs.shape[0])
	crop_info = dict(
	hand_inv_crop_transforms=left_hand_inv_crop_transforms,
	left_hand_crop_transform=left_hand_crop_transform,
	right_hand_crop_transform=right_hand_crop_transform)
	return all_hand_imgs, hand_mean, crop_info


	class SMPLXFaceCropFunc():
	"""This function crop face image from the original image.

	Use the output keypoints predicted by the facce model to locate the face
	position.
	"""
	def __init__(self,
	model_head,
	body_model,
	convention='smplx',
	img_res=256,
	scale_factor=2.0,
	crop_size=256,
	num_betas=10,
	num_expression_coeffs=10,
	condition_face_neck_pose=False,
	condition_face_jaw_pose=True,
	condition_face_shape=False,
	condition_face_expression=True):
	self.model_head = model_head
	self.body_model = body_model
	self.img_res = img_res
	self.convention = convention
	self.num_betas = num_betas
	self.num_expression_coeffs = num_expression_coeffs

	self.face_idx = get_keypoint_idxs_by_part('head', self.convention)
	neck_idx = get_keypoint_idx('neck', self.convention)
	self.neck_kin_chain = find_joint_kin_chain(neck_idx,
	self.body_model.parents)

	self.condition_face_neck_pose = condition_face_neck_pose
	self.condition_face_jaw_pose = condition_face_jaw_pose
	self.condition_face_shape = condition_face_shape
	self.condition_face_expression = condition_face_expression

	self.scale_factor = scale_factor
	self.face_cropper = CropSampler(crop_size)

	def build_face_mean(self, global_orient, body_pose, betas, raw_jaw_pose,
	expression, batch_size):
	"""Builds the initial point for the iterative regressor of the face."""
	face_mean = []
	# Compute the absolute pose of the right wrist
	neck_pose_abs = find_joint_global_rotation(self.neck_kin_chain,
	global_orient, body_pose)
	# Convert the absolute neck pose to offsets
	neck_pose = neck_pose_abs[:, :3, :2].contiguous().reshape(
	batch_size, -1)

	camera_mean = self.model_head.get_mean('camera', batch_size=batch_size)

	neck_pose_condition = (neck_pose if self.condition_face_neck_pose else
	self.model_head.get_mean('global_orient',
	batch_size=batch_size))

	jaw_pose_condition = (raw_jaw_pose.reshape(batch_size, -1)
	if self.condition_face_jaw_pose else
	self.model_head.get_mean('jaw_pose',
	batch_size=batch_size))
	face_num_betas = self.model_head.get_num_betas()
	shape_padding_size = face_num_betas - self.num_betas
	betas_condition = (
	F.pad(betas.reshape(batch_size, -1),
	(0, shape_padding_size)) if self.condition_face_shape else
	self.model_head.get_mean('shape', batch_size=batch_size))

	face_num_expression_coeffs = self.model_head.get_num_expression_coeffs(
	)
	expr_padding_size = face_num_expression_coeffs \
	- self.num_expression_coeffs
	expression_condition = (
	F.pad(expression.reshape(batch_size, -1),
	(0, expr_padding_size)) if self.condition_face_expression
	else self.model_head.get_mean('expression', batch_size=batch_size))

	# Should be Bx(Head pose params)
	face_mean.append(
	torch.cat([
	neck_pose_condition,
	jaw_pose_condition,
	betas_condition,
	expression_condition,
	camera_mean.reshape(batch_size, -1),
	],
	dim=1))

	face_mean = torch.cat(face_mean, dim=0)
	return face_mean

	def __call__(self, body_predictions, img_metas):
	"""Function
	Args:
	body_predictions (dict): The prediction from body model.
	img_metas (dict): Information of the input images.
	Returns:
	all_face_imgs (torch.tensor): Cropped face images.
	face_mean (torch.tensor): Mean value of face params.
	crop_info (dict): Face crop transforms.
	"""
	pred_param = body_predictions['pred_param']
	pred_cam = body_predictions['pred_cam']
	pred_raw = body_predictions['pred_raw']

	pred_output = self.body_model(**pred_param)

	pred_keypoints3d = pred_output['joints']
	pred_keypoints2d = weak_perspective_projection(
	pred_keypoints3d,
	scale=pred_cam[:, 0],
	translation=pred_cam[:, 1:3])
	# concat ori_img
	full_images = []
	for img_meta in img_metas:
	full_images.append(img_meta['ori_img'].to(device=pred_cam.device))
	full_imgs = concat_images(full_images)

	face_joints = (pred_keypoints2d[:, self.face_idx] * 0.5 +
	0.5) * (self.img_res - 1)
	face_points_to_crop = get_crop_info(face_joints, img_metas,
	self.scale_factor, self.img_res)
	face_center = face_points_to_crop['center']
	face_orig_bbox_size = face_points_to_crop['orig_bbox_size']
	face_inv_crop_transforms = face_points_to_crop['inv_crop_transforms']

	face_cropper_out = self.face_cropper(full_imgs, face_center,
	face_orig_bbox_size)
	face_crops = face_cropper_out['images']
	# face_points = face_cropper_out['sampling_grid']
	face_crop_transform = face_cropper_out['transform']

	all_face_imgs = [face_crops]
	all_face_imgs = torch.cat(all_face_imgs, dim=0)

	face_mean = self.build_face_mean(pred_param['global_orient'],
	pred_param['body_pose'],
	pred_param['betas'],
	pred_raw['raw_jaw_pose'],
	pred_param['expression'],
	batch_size=full_imgs.shape[0])
	crop_info = dict(face_inv_crop_transforms=face_inv_crop_transforms,
	face_crop_transform=face_crop_transform)
	return all_face_imgs, face_mean, crop_info