AiOS / datasets /humandata.py
ttxskk
update
d7e58f0
import os
import os.path as osp
import numpy as np
import torch
import cv2
import json
import copy
from pycocotools.coco import COCO
from config.config import cfg
from util.human_models import smpl_x
from util.preprocessing import (
load_img, process_bbox, augmentation_instance_sample, process_human_model_output_batch_simplify,process_db_coord_batch_no_valid)
from util.transforms import world2cam, cam2pixel, rigid_align
from detrsmpl.utils.geometry import batch_rodrigues, project_points_new, weak_perspective_projection, perspective_projection
import tqdm
import time
import random
from detrsmpl.utils.demo_utils import box2cs, xywh2xyxy, xyxy2xywh
import torch.distributed as dist
KPS2D_KEYS = [
'keypoints2d_ori', 'keypoints2d_smplx', 'keypoints2d_smpl',
'keypoints2d_original','keypoints2d_gta','keypoints2d'
]
KPS3D_KEYS = [
'keypoints3d_cam', 'keypoints3d', 'keypoints3d_smplx', 'keypoints3d_smpl',
'keypoints3d_original', 'keypoints3d_gta','keypoints3d'
]
# keypoints3d_cam with root-align has higher priority, followed by old version key keypoints3d
# when there is keypoints3d_smplx, use this rather than keypoints3d_original
from util.formatting import DefaultFormatBundle
from detrsmpl.data.datasets.pipelines.transforms import Normalize
class Cache():
"""A custom implementation for OSX pipeline."""
def __init__(self, load_path=None):
if load_path is not None:
self.load(load_path)
def load(self, load_path):
self.load_path = load_path
self.cache = np.load(load_path, allow_pickle=True)
self.data_len = self.cache['data_len']
self.data_strategy = self.cache['data_strategy']
assert self.data_len == len(self.cache) - 2 # data_len, data_strategy
self.cache = None
@classmethod
def save(cls, save_path, data_list, data_strategy):
assert save_path is not None, 'save_path is None'
data_len = len(data_list)
cache = {}
for i, data in enumerate(data_list):
cache[str(i)] = data
assert len(cache) == data_len
# update meta
cache.update({'data_len': data_len, 'data_strategy': data_strategy})
# import pdb; pdb.set_trace()
np.savez_compressed(save_path, **cache)
print(f'Cache saved to {save_path}.')
# def shuffle(self):
# random.shuffle(self.mapping)
def __len__(self):
return self.data_len
def __getitem__(self, idx):
if self.cache is None:
self.cache = np.load(self.load_path, allow_pickle=True)
# mapped_idx = self.mapping[idx]
# cache_data = self.cache[str(mapped_idx)]
# print(self.cache.files)
cache_data = self.cache[str(idx)]
data = cache_data.item()
return data
class HumanDataset(torch.utils.data.Dataset):
# same mapping for 144->137 and 190->137
SMPLX_137_MAPPING = [
0, 1, 2, 4, 5, 7, 8, 12, 16, 17, 18, 19, 20, 21, 60, 61, 62, 63, 64,
65, 59, 58, 57, 56, 55, 37, 38, 39, 66, 25, 26, 27, 67, 28, 29, 30, 68,
34, 35, 36, 69, 31, 32, 33, 70, 52, 53, 54, 71, 40, 41, 42, 72, 43, 44,
45, 73, 49, 50, 51, 74, 46, 47, 48, 75, 22, 15, 56, 57, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97,
98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
140, 141, 142, 143
]
def __init__(self, transform, data_split):
self.transform = transform
self.data_split = data_split
# dataset information, to be filled by child class
self.img_dir = None
self.annot_path = None
self.annot_path_cache = None
self.use_cache = False
self.img_shape = None # (h, w)
self.cam_param = None # {'focal_length': (fx, fy), 'princpt': (cx, cy)}
self.use_betas_neutral = False
self.body_only = False
self.joint_set = {
'joint_num': smpl_x.joint_num,
'joints_name': smpl_x.joints_name,
'flip_pairs': smpl_x.flip_pairs
}
self.joint_set['root_joint_idx'] = self.joint_set['joints_name'].index(
'Pelvis')
self.format = DefaultFormatBundle()
self.normalize = Normalize(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375])
self.keypoints2d = None
# self.rank = dist.get_rank()
self.lhand_mean = smpl_x.layer['neutral'].left_hand_mean.reshape(15, 3).cpu().numpy()
self.rhand_mean = smpl_x.layer['neutral'].right_hand_mean.reshape(15, 3).cpu().numpy()
# self.log_file_path = f'indices_node{rank}.txt'
def load_cache(self, annot_path_cache):
datalist = Cache(annot_path_cache)
# assert datalist.data_strategy == getattr(cfg, 'data_strategy', None), \
# f'Cache data strategy {datalist.data_strategy} does not match current data strategy ' \
# f'{getattr(cfg, "data_strategy", None)}'
return datalist
def save_cache(self, annot_path_cache, datalist):
print(
f'[{self.__class__.__name__}] Caching datalist to {self.annot_path_cache}...'
)
Cache.save(annot_path_cache,
datalist,
data_strategy=getattr(cfg, 'data_strategy', None))
def load_data(self, train_sample_interval=1,
hand_bbox_ratio=1, body_bbox_ratio=1):
content = np.load(self.annot_path, allow_pickle=True)
try:
frame_range = content['frame_range']
except KeyError:
self.num_data = len(content['image_path'])
frame_range = \
np.array([[i, i + 1] for i in range(self.num_data)])
num_examples = len(frame_range)
if 'meta' in content:
meta = content['meta'].item()
print('meta keys:', meta.keys())
else:
meta = None
print(
'No meta info provided! Please give height and width manually')
print(
f'Start loading humandata {self.annot_path} into memory...\nDataset includes: {content.files}'
)
tic = time.time()
image_path = content['image_path']
if meta is not None and 'height' in meta and len(meta['height'])>0:
height = np.array(meta['height'])
width = np.array(meta['width'])
image_shape = np.stack([height, width], axis=-1)
else:
image_shape = None
if meta is not None and 'gender' in meta and len(meta['gender']) != 0:
gender = np.array(meta['gender'])
else:
gender = None
bbox_xywh = content['bbox_xywh']
if 'smplx' in content:
smplx = content['smplx'].item()
as_smplx = 'smplx'
elif 'smpl' in content:
smplx = content['smpl'].item()
as_smplx = 'smpl'
elif 'smplh' in content:
smplx = content['smplh'].item()
as_smplx = 'smplh'
# TODO: temp solution, should be more general. But SHAPY is very special
elif self.__class__.__name__ == 'SHAPY':
smplx = {}
else:
raise KeyError('No SMPL for SMPLX available, please check keys:\n'
f'{content.files}')
print('Smplx param', smplx.keys())
if 'lhand_bbox_xywh' in content and 'rhand_bbox_xywh' in content:
lhand_bbox_xywh = content['lhand_bbox_xywh']
rhand_bbox_xywh = content['rhand_bbox_xywh']
else:
lhand_bbox_xywh = np.zeros_like(bbox_xywh)
rhand_bbox_xywh = np.zeros_like(bbox_xywh)
if 'face_bbox_xywh' in content:
face_bbox_xywh = content['face_bbox_xywh']
else:
face_bbox_xywh = np.zeros_like(bbox_xywh)
if meta is not None and 'smplx_valid' in meta:
smplx_valid = meta['smplx_valid']
else:
smplx_valid = np.ones(len(bbox_xywh))
decompressed = False
if content['__keypoints_compressed__']:
decompressed_kps = self.decompress_keypoints(content)
decompressed = True
keypoints3d = None
valid_kps3d = False
keypoints3d_mask = None
valid_kps3d_mask = False
# processing keypoints
for kps3d_key in KPS3D_KEYS:
if kps3d_key in content:
keypoints3d = decompressed_kps[kps3d_key][:, self.SMPLX_137_MAPPING, :] if decompressed \
else content[kps3d_key][:, self.SMPLX_137_MAPPING, :]
valid_kps3d = True
if keypoints3d.shape[-1] == 4:
valid_kps3d_mask = True
break
if self.keypoints2d is not None:
keypoints2d = decompressed_kps[self.keypoints2d][:, self.SMPLX_137_MAPPING, :] if decompressed \
else content[self.keypoints2d][:, self.SMPLX_137_MAPPING, :]
else:
for kps2d_key in KPS2D_KEYS:
if kps2d_key in content:
keypoints2d = decompressed_kps[kps2d_key][:, self.SMPLX_137_MAPPING, :] if decompressed \
else content[kps2d_key][:, self.SMPLX_137_MAPPING, :]
break
if keypoints2d.shape[-1] == 3:
valid_kps3d_mask = True
print('Done. Time: {:.2f}s'.format(time.time() - tic))
datalist = []
# num_examples
# processing each image, filter according to bbox valid
for i in tqdm.tqdm(range(int(num_examples))):
if self.data_split == 'train' and i % train_sample_interval != 0:
continue
frame_start, frame_end = frame_range[i]
img_path = osp.join(self.img_dir, image_path[frame_start])
# im_shape = cv2.imread(img_path).shape[:2]
img_shape = image_shape[
frame_start] if image_shape is not None else self.img_shape
bbox_list = bbox_xywh[frame_start:frame_end, :4]
valid_idx = []
body_bbox_list = []
# if hasattr(cfg, 'bbox_ratio'):
# bbox_ratio = cfg.bbox_ratio * 0.833 # preprocess body bbox is giving 1.2 box padding
# else:
# bbox_ratio = 1.25
# if self.__class__.__name__ == 'SPEC':
# bbox_ratio = 1.25
for bbox_i, bbox in enumerate(bbox_list):
bbox = process_bbox(bbox,
img_width=img_shape[1],
img_height=img_shape[0],
ratio=body_bbox_ratio)
if bbox is None:
continue
else:
valid_idx.append(frame_start + bbox_i)
bbox[2:] += bbox[:2]
body_bbox_list.append(bbox)
if len(valid_idx) == 0:
continue
valid_num = len(valid_idx)
# hand/face bbox
lhand_bbox_list = []
rhand_bbox_list = []
face_bbox_list = []
smplx_valid_list = []
for bbox_i in valid_idx:
smplx_valid_list.append(smplx_valid[bbox_i])
lhand_bbox = lhand_bbox_xywh[bbox_i]
rhand_bbox = rhand_bbox_xywh[bbox_i]
face_bbox = face_bbox_xywh[bbox_i]
if lhand_bbox[-1] > 0: # conf > 0
lhand_bbox = lhand_bbox[:4]
# if hasattr(cfg, 'bbox_ratio'):
lhand_bbox = process_bbox(lhand_bbox,
img_width=img_shape[1],
img_height=img_shape[0],
ratio=hand_bbox_ratio)
if lhand_bbox is not None:
lhand_bbox[2:] += lhand_bbox[:2] # xywh -> xyxy
else:
lhand_bbox = None
if rhand_bbox[-1] > 0:
rhand_bbox = rhand_bbox[:4]
# if hasattr(cfg, 'bbox_ratio'):
rhand_bbox = process_bbox(rhand_bbox,
img_width=img_shape[1],
img_height=img_shape[0],
ratio=hand_bbox_ratio)
if rhand_bbox is not None:
rhand_bbox[2:] += rhand_bbox[:2] # xywh -> xyxy
else:
rhand_bbox = None
if face_bbox[-1] > 0:
face_bbox = face_bbox[:4]
# if hasattr(cfg, 'bbox_ratio'):
face_bbox = process_bbox(face_bbox,
img_width=img_shape[1],
img_height=img_shape[0],
ratio=hand_bbox_ratio)
if face_bbox is not None:
face_bbox[2:] += face_bbox[:2] # xywh -> xyxy
else:
face_bbox = None
lhand_bbox_list.append(lhand_bbox)
rhand_bbox_list.append(rhand_bbox)
face_bbox_list.append(face_bbox)
joint_img = keypoints2d[valid_idx]
if valid_kps3d:
joint_cam = keypoints3d[valid_idx]
else:
joint_cam = None
if 'leye_pose_0' in smplx.keys():
smplx.pop('leye_pose_0')
if 'leye_pose_1' in smplx.keys():
smplx.pop('leye_pose_1')
if 'leye_pose' in smplx.keys():
smplx.pop('leye_pose')
if 'reye_pose_0' in smplx.keys():
smplx.pop('reye_pose_0')
if 'reye_pose_1' in smplx.keys():
smplx.pop('reye_pose_1')
if 'reye_pose' in smplx.keys():
smplx.pop('reye_pose')
smplx_param = {k: v[valid_idx] for k, v in smplx.items()}
gender_ = gender[valid_idx] \
if gender is not None else np.array(['neutral']*(valid_num))
lhand_bbox_valid = lhand_bbox_xywh[valid_idx,4]
rhand_bbox_valid = rhand_bbox_xywh[valid_idx,4]
face_bbox_valid = face_bbox_xywh[valid_idx,4]
# TODO: set invalid if None?
smplx_param['root_pose'] = smplx_param.pop('global_orient', None)
smplx_param['shape'] = smplx_param.pop('betas', None)
smplx_param['trans'] = smplx_param.pop('transl', np.zeros([len(valid_idx),3]))
smplx_param['lhand_pose'] = smplx_param.pop('left_hand_pose', None)
smplx_param['rhand_pose'] = smplx_param.pop(
'right_hand_pose', None)
smplx_param['expr'] = smplx_param.pop('expression', None)
# TODO do not fix betas, give up shape supervision
if 'betas_neutral' in smplx_param and self.data_split == 'train':
smplx_param['shape'] = smplx_param.pop('betas_neutral')
# smplx_param['shape'] = np.zeros(10, dtype=np.float32)
# # TODO fix shape of poses
if self.__class__.__name__ == 'Talkshow':
smplx_param['body_pose'] = smplx_param['body_pose'].reshape(
-1, 21, 3)
smplx_param['lhand_pose'] = smplx_param['lhand_pose'].reshape(
-1, 15, 3)
smplx_param['rhand_pose'] = smplx_param['lhand_pose'].reshape(
-1, 15, 3)
smplx_param['expr'] = smplx_param['expr'][:, :10]
if self.__class__.__name__ == 'BEDLAM':
smplx_param['shape'] = smplx_param['shape'][:, :10]
# smplx_param['expr'] = None
if self.__class__.__name__ == 'GTA':
smplx_param['shape'] = np.zeros(
[valid_num, 10],
dtype=np.float32)
if self.__class__.__name__ == 'COCO_NA':
# smplx_param['expr'] = None
smplx_param['body_pose'] = smplx_param['body_pose'].reshape(
-1, 21, 3)
smplx_param['lhand_pose'] = smplx_param['lhand_pose'].reshape(
-1, 15, 3)
smplx_param['rhand_pose'] = smplx_param['rhand_pose'].reshape(
-1, 15, 3)
if as_smplx == 'smpl':
smplx_param['shape'] = np.zeros(
[valid_num, 10],
dtype=np.float32) # drop smpl betas for smplx
smplx_param['body_pose'] = smplx_param[
'body_pose'].reshape(-1,23,3)[:, :21, :] # use smpl body_pose on smplx
if as_smplx == 'smplh':
smplx_param['shape'] = np.zeros(
[valid_num, 10],
dtype=np.float32) # drop smpl betas for smplx
if smplx_param['lhand_pose'] is None or self.body_only == True:
smplx_param['lhand_valid'] = np.zeros(valid_num, dtype=np.bool8)
else:
smplx_param['lhand_valid'] = lhand_bbox_valid.astype(np.bool8)
if smplx_param['rhand_pose'] is None or self.body_only == True:
smplx_param['rhand_valid'] = np.zeros(valid_num, dtype=np.bool8)
else:
smplx_param['rhand_valid'] = rhand_bbox_valid.astype(np.bool8)
if smplx_param['expr'] is None or self.body_only == True:
smplx_param['face_valid'] = np.zeros(valid_num, dtype=np.bool8)
else:
smplx_param['face_valid'] = face_bbox_valid.astype(np.bool8)
smplx_param['smplx_valid'] = np.array(smplx_valid_list).astype(np.bool8)
if joint_cam is not None and np.any(np.isnan(joint_cam)):
continue
if self.__class__.__name__ == 'SPEC':
joint_img[:,:,2] = joint_img[:,:,2]>0
joint_cam[:,:,3] = joint_cam[:,:,0]!=0
datalist.append({
'img_path': img_path,
'img_shape': img_shape,
'bbox': body_bbox_list,
'lhand_bbox': lhand_bbox_list,
'rhand_bbox': rhand_bbox_list,
'face_bbox': face_bbox_list,
'joint_img': joint_img,
'joint_cam': joint_cam,
'smplx_param': smplx_param,
'as_smplx': as_smplx,
'gender': gender_
})
# save memory
del content, image_path, bbox_xywh, lhand_bbox_xywh, rhand_bbox_xywh, face_bbox_xywh, keypoints3d, keypoints2d
if self.data_split == 'train':
print(f'[{self.__class__.__name__} train] original size:',
int(num_examples), '. Sample interval:',
train_sample_interval, '. Sampled size:', len(datalist))
if getattr(cfg, 'data_strategy',
None) == 'balance' and self.data_split == 'train':
print(
f'[{self.__class__.__name__}] Using [balance] strategy with datalist shuffled...'
)
random.shuffle(datalist)
return datalist
def __len__(self):
return len(self.datalist)
# 19493
def __getitem__(self, idx):
# rank = self.rank
# local_rank = rank % torch.cuda.device_count()
# with open(f'index_log_{rank}.txt', 'a') as f:
# f.write(f'{rank}-{local_rank}-{idx}\n')
try:
data = copy.deepcopy(self.datalist[idx])
except Exception as e:
print(f'[{self.__class__.__name__}] Error loading data {idx}')
print(e)
exit(0)
# data/datasets/coco_2017/train2017/000000029582.jpg' 45680
img_path, img_shape, bbox = \
data['img_path'], data['img_shape'], data['bbox']
as_smplx = data['as_smplx']
gender = data['gender'].copy()
for gender_str, gender_num in {
'neutral': -1, 'male': 0, 'female': 1}.items():
gender[gender==gender_str]=gender_num
gender = gender.astype(int)
img_whole_bbox = np.array([0, 0, img_shape[1], img_shape[0]])
img = load_img(img_path, order='BGR')
num_person = len(data['bbox'])
data_name = self.__class__.__name__
try:
# dist.barrier()
img, img2bb_trans, bb2img_trans, rot, do_flip = \
augmentation_instance_sample(img, img_whole_bbox, self.data_split, data, data_name)
except Exception as e:
rank = self.rank
local_rank = rank % torch.cuda.device_count()
with open(f'index_log_{rank}.txt', 'a') as f:
f.write(f'{rank}-{local_rank}-{idx}\n')
f.write(f'[{self.__class__.__name__}] Error loading data {idx}\n')
f.write(f'Error in augmentation_instance_sample for {img_path}\n')
# print(f'[{self.__class__.__name__}] Error loading data {idx}')
# print(f'Error in augmentation_instance_sample for {img_path}')
raise e
cropped_img_shape = img.shape[:2]
if self.data_split == 'train':
joint_cam = data['joint_cam'] # num, 137,4
if joint_cam is not None:
dummy_cord = False
joint_cam[:,:,:3] = \
joint_cam[:,:,:3] - joint_cam[:, self.joint_set['root_joint_idx'], None, :3] # root-relative
else:
# dummy cord as joint_cam
dummy_cord = True
joint_cam = np.zeros(
(num_person, self.joint_set['joint_num'], 4),
dtype=np.float32)
joint_img = data['joint_img']
# do rotation on keypoints
joint_img_aug, joint_cam_wo_ra, joint_cam_ra, joint_trunc = \
process_db_coord_batch_no_valid(
joint_img, joint_cam, do_flip, img_shape,
self.joint_set['flip_pairs'], img2bb_trans, rot,
self.joint_set['joints_name'], smpl_x.joints_name,
cropped_img_shape)
joint_img_aug[:,:,2:] = joint_img_aug[:,:,2:] * joint_trunc
# smplx coordinates and parameters
smplx_param = data['smplx_param']
if self.__class__.__name__ in [ 'CHI3D', 'SynBody', 'UBody_MM']:
smplx_param['lhand_pose']-=self.lhand_mean[None]
smplx_param['rhand_pose']-=self.rhand_mean[None]
# smplx_param
smplx_pose, smplx_shape, smplx_expr, smplx_pose_valid, \
smplx_joint_valid, smplx_expr_valid, smplx_shape_valid = \
process_human_model_output_batch_simplify(
smplx_param, do_flip, rot, as_smplx, data_name)
smplx_joint_valid = smplx_joint_valid[:, :, None]
# if cam not provided, we take joint_img as smplx joint 2d,
# which is commonly the case for our processed humandata
# change smplx_shape if use_betas_neutral
# processing follows that in process_human_model_output
if self.use_betas_neutral:
smplx_shape = smplx_param['betas_neutral'].reshape(
num_person, -1)
smplx_shape[(np.abs(smplx_shape) > 3).any(axis=1)] = 0.
smplx_shape = smplx_shape.reshape(num_person, -1)
if self.__class__.__name__ == 'MPII_MM' :
for name in ('L_Ankle', 'R_Ankle', 'L_Wrist', 'R_Wrist'):
smplx_pose_valid[:, smpl_x.orig_joints_name.index(name)] = 0
for name in ('L_Big_toe', 'L_Small_toe', 'L_Heel', 'R_Big_toe', 'R_Small_toe', 'R_Heel'):
smplx_joint_valid[:,smpl_x.joints_name.index(name)] = 0
lhand_bbox_center_list = []
lhand_bbox_valid_list = []
lhand_bbox_size_list = []
lhand_bbox_list = []
face_bbox_center_list = []
face_bbox_size_list = []
face_bbox_valid_list = []
face_bbox_list = []
rhand_bbox_center_list = []
rhand_bbox_valid_list = []
rhand_bbox_size_list = []
rhand_bbox_list = []
body_bbox_center_list = []
body_bbox_size_list = []
body_bbox_valid_list = []
body_bbox_list = []
# hand and face bbox transform
for i in range(num_person):
body_bbox, body_bbox_valid = self.process_hand_face_bbox(
data['bbox'][i], do_flip, img_shape, img2bb_trans,
cropped_img_shape)
lhand_bbox, lhand_bbox_valid = self.process_hand_face_bbox(
data['lhand_bbox'][i], do_flip, img_shape, img2bb_trans,
cropped_img_shape)
lhand_bbox_valid *= smplx_param['lhand_valid'][i]
rhand_bbox, rhand_bbox_valid = self.process_hand_face_bbox(
data['rhand_bbox'][i], do_flip, img_shape, img2bb_trans,
cropped_img_shape)
rhand_bbox_valid *= smplx_param['rhand_valid'][i]
face_bbox, face_bbox_valid = self.process_hand_face_bbox(
data['face_bbox'][i], do_flip, img_shape, img2bb_trans,
cropped_img_shape)
face_bbox_valid *= smplx_param['face_valid'][i]
# BEDLAM and COCO_NA do not have face expression
# if self.__class__.__name__ != 'BEDLAM':
# face_bbox_valid *= smplx_param['face_valid'][i]
if do_flip:
lhand_bbox, rhand_bbox = rhand_bbox, lhand_bbox
lhand_bbox_valid, rhand_bbox_valid = rhand_bbox_valid, lhand_bbox_valid
body_bbox_list.append(body_bbox)
lhand_bbox_list.append(lhand_bbox)
rhand_bbox_list.append(rhand_bbox)
face_bbox_list.append(face_bbox)
lhand_bbox_center = (lhand_bbox[0] + lhand_bbox[1]) / 2.
rhand_bbox_center = (rhand_bbox[0] + rhand_bbox[1]) / 2.
face_bbox_center = (face_bbox[0] + face_bbox[1]) / 2.
body_bbox_center = (body_bbox[0] + body_bbox[1]) / 2.
lhand_bbox_size = lhand_bbox[1] - lhand_bbox[0]
rhand_bbox_size = rhand_bbox[1] - rhand_bbox[0]
face_bbox_size = face_bbox[1] - face_bbox[0]
body_bbox_size = body_bbox[1] - body_bbox[0]
lhand_bbox_center_list.append(lhand_bbox_center)
lhand_bbox_valid_list.append(lhand_bbox_valid)
lhand_bbox_size_list.append(lhand_bbox_size)
face_bbox_center_list.append(face_bbox_center)
face_bbox_size_list.append(face_bbox_size)
face_bbox_valid_list.append(face_bbox_valid)
rhand_bbox_center_list.append(rhand_bbox_center)
rhand_bbox_valid_list.append(rhand_bbox_valid)
rhand_bbox_size_list.append(rhand_bbox_size)
body_bbox_center_list.append(body_bbox_center)
body_bbox_size_list.append(body_bbox_size)
body_bbox_valid_list.append(body_bbox_valid)
body_bbox = np.stack(body_bbox_list, axis=0)
lhand_bbox = np.stack(lhand_bbox_list, axis=0)
rhand_bbox = np.stack(rhand_bbox_list, axis=0)
face_bbox = np.stack(face_bbox_list, axis=0)
lhand_bbox_center = np.stack(lhand_bbox_center_list, axis=0)
lhand_bbox_valid = np.stack(lhand_bbox_valid_list, axis=0)
lhand_bbox_size = np.stack(lhand_bbox_size_list, axis=0)
face_bbox_center = np.stack(face_bbox_center_list, axis=0)
face_bbox_size = np.stack(face_bbox_size_list, axis=0)
face_bbox_valid = np.stack(face_bbox_valid_list, axis=0)
body_bbox_center = np.stack(body_bbox_center_list, axis=0)
body_bbox_size = np.stack(body_bbox_size_list, axis=0)
body_bbox_valid = np.stack(body_bbox_valid_list, axis=0)
rhand_bbox_center = np.stack(rhand_bbox_center_list, axis=0)
rhand_bbox_valid = np.stack(rhand_bbox_valid_list, axis=0)
rhand_bbox_size = np.stack(rhand_bbox_size_list, axis=0)
inputs = {'img': img}
# joint_img_aug[:,:,2] = joint_img_aug[:,:,2] * body_bbox_valid[:,None]
is_3D = float(False) if dummy_cord else float(True)
if self.__class__.__name__ == 'COCO_NA':
is_3D = False
if self.__class__.__name__ == 'GTA_Human2':
smplx_shape_valid = smplx_shape_valid * 0
if self.__class__.__name__ == 'PoseTrack' or self.__class__.__name__ == 'MPII_MM' \
or self.__class__.__name__ == 'CrowdPose' or self.__class__.__name__ == 'UBody_MM' \
or self.__class__.__name__ == 'COCO_NA':
joint_cam_ra[...,-1] = joint_cam_ra[...,-1] * smplx_joint_valid[...,0]
joint_cam_wo_ra[...,-1] = joint_cam_wo_ra[...,-1] * smplx_joint_valid[...,0]
joint_img_aug[...,-1] = joint_img_aug[...,-1] * smplx_joint_valid[...,0]
# if body_bbox_valid.sum() > 0:
targets = {
# keypoints2d, [0,img_w],[0,img_h] -> [0,1] -> [0,output_hm_shape]
'joint_img': joint_img_aug[body_bbox_valid>0],
# joint_cam, kp3d wo ra # raw kps3d probably without ra
'joint_cam': joint_cam_wo_ra[body_bbox_valid>0],
# kps3d with body, face, hand ra
'smplx_joint_cam': joint_cam_ra[body_bbox_valid>0],
'smplx_pose': smplx_pose[body_bbox_valid>0],
'smplx_shape': smplx_shape[body_bbox_valid>0],
'smplx_expr': smplx_expr[body_bbox_valid>0],
'lhand_bbox_center': lhand_bbox_center[body_bbox_valid>0],
'lhand_bbox_size': lhand_bbox_size[body_bbox_valid>0],
'rhand_bbox_center': rhand_bbox_center[body_bbox_valid>0],
'rhand_bbox_size': rhand_bbox_size[body_bbox_valid>0],
'face_bbox_center': face_bbox_center[body_bbox_valid>0],
'face_bbox_size': face_bbox_size[body_bbox_valid>0],
'body_bbox_center': body_bbox_center[body_bbox_valid>0],
'body_bbox_size': body_bbox_size[body_bbox_valid>0],
'body_bbox': body_bbox.reshape(-1,4)[body_bbox_valid>0],
'lhand_bbox': lhand_bbox.reshape(-1,4)[body_bbox_valid>0],
'rhand_bbox': rhand_bbox.reshape(-1,4)[body_bbox_valid>0],
'face_bbox': face_bbox.reshape(-1,4)[body_bbox_valid>0],
'gender': gender[body_bbox_valid>0]}
meta_info = {
'joint_trunc': joint_trunc[body_bbox_valid>0],
'smplx_pose_valid': smplx_pose_valid[body_bbox_valid>0],
'smplx_shape_valid': smplx_shape_valid[body_bbox_valid>0],
'smplx_expr_valid': smplx_expr_valid[body_bbox_valid>0],
'is_3D': is_3D,
'lhand_bbox_valid': lhand_bbox_valid[body_bbox_valid>0],
'rhand_bbox_valid': rhand_bbox_valid[body_bbox_valid>0],
'face_bbox_valid': face_bbox_valid[body_bbox_valid>0],
'body_bbox_valid': body_bbox_valid[body_bbox_valid>0],
'img_shape': np.array(img.shape[:2]),
'ori_shape':data['img_shape'],
'idx': idx
}
result = {**inputs, **targets, **meta_info}
result = self.normalize(result)
result = self.format(result)
return result
if self.data_split == 'test':
self.cam_param = {}
joint_cam = data['joint_cam']
if joint_cam is not None:
dummy_cord = False
joint_cam[:,:,:3] = joint_cam[:,:,:3] - joint_cam[
:, self.joint_set['root_joint_idx'], None, :3] # root-relative
else:
# dummy cord as joint_cam
dummy_cord = True
joint_cam = np.zeros(
(num_person, self.joint_set['joint_num'], 3),
dtype=np.float32)
joint_img = data['joint_img']
joint_img_aug, joint_cam_wo_ra, joint_cam_ra, joint_trunc = \
process_db_coord_batch_no_valid(
joint_img, joint_cam, do_flip, img_shape,
self.joint_set['flip_pairs'], img2bb_trans, rot,
self.joint_set['joints_name'], smpl_x.joints_name,
cropped_img_shape)
# smplx coordinates and parameters
smplx_param = data['smplx_param']
# smplx_cam_trans = np.array(
# smplx_param['trans']) if 'trans' in smplx_param else None
# TODO: remove this, seperate smpl and smplx
smplx_pose, smplx_shape, smplx_expr, smplx_pose_valid, \
smplx_joint_valid, smplx_expr_valid, smplx_shape_valid = \
process_human_model_output_batch_simplify(
smplx_param, do_flip, rot, as_smplx)
# if cam not provided, we take joint_img as smplx joint 2d,
# which is commonly the case for our processed humandata
if self.use_betas_neutral:
smplx_shape = smplx_param['betas_neutral'].reshape(
num_person, -1)
smplx_shape[(np.abs(smplx_shape) > 3).any(axis=1)] = 0.
smplx_shape = smplx_shape.reshape(num_person, -1)
# smplx_pose_valid = np.tile(smplx_pose_valid[:,:, None], (1, 3)).reshape(num_person,-1)
smplx_joint_valid = smplx_joint_valid[:, :, None]
# if not (smplx_shape == 0).all():
# smplx_shape_valid = True
# else:
# smplx_shape_valid = False
lhand_bbox_center_list = []
lhand_bbox_valid_list = []
lhand_bbox_size_list = []
lhand_bbox_list = []
face_bbox_center_list = []
face_bbox_size_list = []
face_bbox_valid_list = []
face_bbox_list = []
rhand_bbox_center_list = []
rhand_bbox_valid_list = []
rhand_bbox_size_list = []
rhand_bbox_list = []
body_bbox_center_list = []
body_bbox_size_list = []
body_bbox_valid_list = []
body_bbox_list = []
for i in range(num_person):
lhand_bbox, lhand_bbox_valid = self.process_hand_face_bbox(
data['lhand_bbox'][i], do_flip, img_shape, img2bb_trans,
cropped_img_shape)
rhand_bbox, rhand_bbox_valid = self.process_hand_face_bbox(
data['rhand_bbox'][i], do_flip, img_shape, img2bb_trans,
cropped_img_shape)
face_bbox, face_bbox_valid = self.process_hand_face_bbox(
data['face_bbox'][i], do_flip, img_shape, img2bb_trans,
cropped_img_shape)
body_bbox, body_bbox_valid = self.process_hand_face_bbox(
data['bbox'][i], do_flip, img_shape, img2bb_trans,
cropped_img_shape)
if do_flip:
lhand_bbox, rhand_bbox = rhand_bbox, lhand_bbox
lhand_bbox_valid, rhand_bbox_valid = rhand_bbox_valid, lhand_bbox_valid
body_bbox_list.append(body_bbox)
lhand_bbox_list.append(lhand_bbox)
rhand_bbox_list.append(rhand_bbox)
face_bbox_list.append(face_bbox)
lhand_bbox_center = (lhand_bbox[0] + lhand_bbox[1]) / 2.
rhand_bbox_center = (rhand_bbox[0] + rhand_bbox[1]) / 2.
face_bbox_center = (face_bbox[0] + face_bbox[1]) / 2.
body_bbox_center = (body_bbox[0] + body_bbox[1]) / 2.
lhand_bbox_size = lhand_bbox[1] - lhand_bbox[0]
rhand_bbox_size = rhand_bbox[1] - rhand_bbox[0]
face_bbox_size = face_bbox[1] - face_bbox[0]
body_bbox_size = body_bbox[1] - body_bbox[0]
lhand_bbox_center_list.append(lhand_bbox_center)
lhand_bbox_valid_list.append(lhand_bbox_valid)
lhand_bbox_size_list.append(lhand_bbox_size)
face_bbox_center_list.append(face_bbox_center)
face_bbox_size_list.append(face_bbox_size)
face_bbox_valid_list.append(face_bbox_valid)
rhand_bbox_center_list.append(rhand_bbox_center)
rhand_bbox_valid_list.append(rhand_bbox_valid)
rhand_bbox_size_list.append(rhand_bbox_size)
body_bbox_center_list.append(body_bbox_center)
body_bbox_size_list.append(body_bbox_size)
body_bbox_valid_list.append(body_bbox_valid)
body_bbox = np.stack(body_bbox_list, axis=0)
lhand_bbox = np.stack(lhand_bbox_list, axis=0)
rhand_bbox = np.stack(rhand_bbox_list, axis=0)
face_bbox = np.stack(face_bbox_list, axis=0)
lhand_bbox_center = np.stack(lhand_bbox_center_list, axis=0)
lhand_bbox_valid = np.stack(lhand_bbox_valid_list, axis=0)
lhand_bbox_size = np.stack(lhand_bbox_size_list, axis=0)
face_bbox_center = np.stack(face_bbox_center_list, axis=0)
face_bbox_size = np.stack(face_bbox_size_list, axis=0)
face_bbox_valid = np.stack(face_bbox_valid_list, axis=0)
body_bbox_center = np.stack(body_bbox_center_list, axis=0)
body_bbox_size = np.stack(body_bbox_size_list, axis=0)
body_bbox_valid = np.stack(body_bbox_valid_list, axis=0)
rhand_bbox_center = np.stack(rhand_bbox_center_list, axis=0)
rhand_bbox_valid = np.stack(rhand_bbox_valid_list, axis=0)
rhand_bbox_size = np.stack(rhand_bbox_size_list, axis=0)
inputs = {'img': img}
targets = {
# keypoints2d, [0,img_w],[0,img_h] -> [0,1] -> [0,output_hm_shape]
'joint_img': joint_img_aug,
# projected smplx if valid cam_param, else same as keypoints2d
# joint_cam, kp3d wo ra # raw kps3d probably without ra
'joint_cam': joint_cam_wo_ra,
'ann_idx': idx,
# kps3d with body, face, hand ra
'smplx_joint_cam': joint_cam_ra,
'smplx_pose': smplx_pose,
'smplx_shape': smplx_shape,
'smplx_expr': smplx_expr,
'lhand_bbox_center': lhand_bbox_center,
'lhand_bbox_size': lhand_bbox_size,
'rhand_bbox_center': rhand_bbox_center,
'rhand_bbox_size': rhand_bbox_size,
'face_bbox_center': face_bbox_center,
'face_bbox_size': face_bbox_size,
'body_bbox_center': body_bbox_center,
'body_bbox_size': body_bbox_size,
'body_bbox': body_bbox.reshape(-1,4),
'lhand_bbox': lhand_bbox.reshape(-1,4),
'rhand_bbox': rhand_bbox.reshape(-1,4),
'face_bbox': face_bbox.reshape(-1,4),
'gender': gender,
'bb2img_trans': bb2img_trans,
}
if self.body_only:
meta_info = {
'joint_trunc': joint_trunc,
'smplx_pose_valid': smplx_pose_valid,
'smplx_shape_valid': float(smplx_shape_valid),
'smplx_expr_valid': smplx_expr_valid,
'is_3D': float(False) if dummy_cord else float(True),
'lhand_bbox_valid': lhand_bbox_valid,
'rhand_bbox_valid': rhand_bbox_valid,
'face_bbox_valid': face_bbox_valid,
'body_bbox_valid': body_bbox_valid,
'img_shape': np.array(img.shape[:2]),
'ori_shape':data['img_shape'],
'idx': idx
}
else:
meta_info = {
'joint_trunc': joint_trunc,
'smplx_pose_valid': smplx_pose_valid,
'smplx_shape_valid': smplx_shape_valid,
'smplx_expr_valid': smplx_expr_valid,
'is_3D': float(False) if dummy_cord else float(True),
'lhand_bbox_valid': lhand_bbox_valid,
'rhand_bbox_valid': rhand_bbox_valid,
'face_bbox_valid': face_bbox_valid,
'body_bbox_valid': body_bbox_valid,
'img_shape': np.array(img.shape[:2]),
'ori_shape':data['img_shape'],
'idx': idx
}
result = {**inputs, **targets, **meta_info}
result = self.normalize(result)
result = self.format(result)
return result
def process_hand_face_bbox(self, bbox, do_flip, img_shape, img2bb_trans,
input_img_shape):
if bbox is None:
bbox = np.array([0, 0, 1, 1],
dtype=np.float32).reshape(2, 2) # dummy value
bbox_valid = float(False) # dummy value
else:
# reshape to top-left (x,y) and bottom-right (x,y)
bbox = bbox.reshape(2, 2)
# flip augmentation
if do_flip:
bbox[:, 0] = img_shape[1] - bbox[:, 0] - 1
bbox[0, 0], bbox[1, 0] = bbox[1, 0].copy(), bbox[
0, 0].copy() # xmin <-> xmax swap
# make four points of the bbox
bbox = bbox.reshape(4).tolist()
xmin, ymin, xmax, ymax = bbox
bbox = np.array(
[[xmin, ymin], [xmax, ymin], [xmax, ymax], [xmin, ymax]],
dtype=np.float32).reshape(4, 2)
# affine transformation (crop, rotation, scale)
bbox_xy1 = np.concatenate((bbox, np.ones_like(bbox[:, :1])), 1)
bbox = np.dot(img2bb_trans,
bbox_xy1.transpose(1, 0)).transpose(1, 0)[:, :2]
# print(bbox)
# bbox[:, 0] = bbox[:, 0] / input_img_shape[1] * cfg.output_hm_shape[2]
# bbox[:, 1] = bbox[:, 1] / input_img_shape[0] * cfg.output_hm_shape[1]
bbox[:, 0] /= input_img_shape[1]
bbox[:, 1] /= input_img_shape[0]
# make box a rectangle without rotation
if np.max(bbox[:,0])<=0 or np.min(bbox[:,0])>=1 or np.max(bbox[:,1])<=0 or np.min(bbox[:,1])>=1:
bbox_valid = float(False)
bbox = np.array([0, 0, 1, 1], dtype=np.float32)
else:
xmin = np.max([np.min(bbox[:, 0]), 0])
xmax = np.min([np.max(bbox[:, 0]), 1])
ymin = np.max([np.min(bbox[:, 1]), 0])
ymax = np.min([np.max(bbox[:, 1]), 1])
bbox = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
bbox = np.clip(bbox,0,1)
bbox_valid = float(True)
bbox = bbox.reshape(2, 2)
return bbox, bbox_valid
def evaluate(self, outs, cur_sample_idx=None):
annots = self.datalist
sample_num = len(outs)
eval_result = {
'pa_mpvpe_all': [],
'pa_mpvpe_l_hand': [],
'pa_mpvpe_r_hand': [],
'pa_mpvpe_hand': [],
'pa_mpvpe_face': [],
'mpvpe_all': [],
'mpvpe_l_hand': [],
'mpvpe_r_hand': [],
'mpvpe_hand': [],
'mpvpe_face': [],
'pa_mpjpe_body': [],
'pa_mpjpe_l_hand': [],
'pa_mpjpe_r_hand': [],
'pa_mpjpe_hand': []
}
for n in range(sample_num):
out = outs[n]
ann_idx = out['gt_ann_idx']
mesh_gt = out['smplx_mesh_cam_pseudo_gt']
mesh_out = out['smplx_mesh_cam']
cam_trans = out['cam_trans']
ann_idx = out['gt_ann_idx']
img_path = []
for ann_id in ann_idx:
img_path.append(annots[ann_id]['img_path'])
eval_result['img_path'] = img_path
eval_result['ann_idx'] = ann_idx
img = out['img']
# MPVPE from all vertices
mesh_out_align = mesh_out - np.dot(
smpl_x.J_regressor,
mesh_out)[smpl_x.J_regressor_idx['pelvis'], None, :] + np.dot(
smpl_x.J_regressor,
mesh_gt)[smpl_x.J_regressor_idx['pelvis'], None, :]
eval_result['mpvpe_all'].append(
np.sqrt(np.sum(
(mesh_out_align - mesh_gt)**2, 1)).mean() * 1000)
mesh_out_align = rigid_align(mesh_out, mesh_gt)
eval_result['pa_mpvpe_all'].append(
np.sqrt(np.sum(
(mesh_out_align - mesh_gt)**2, 1)).mean() * 1000)
# MPVPE from hand vertices
mesh_gt_lhand = mesh_gt[smpl_x.hand_vertex_idx['left_hand'], :]
mesh_out_lhand = mesh_out[smpl_x.hand_vertex_idx['left_hand'], :]
mesh_gt_rhand = mesh_gt[smpl_x.hand_vertex_idx['right_hand'], :]
mesh_out_rhand = mesh_out[smpl_x.hand_vertex_idx['right_hand'], :]
mesh_out_lhand_align = mesh_out_lhand - np.dot(
smpl_x.J_regressor,
mesh_out)[smpl_x.J_regressor_idx['lwrist'], None, :] + np.dot(
smpl_x.J_regressor,
mesh_gt)[smpl_x.J_regressor_idx['lwrist'], None, :]
mesh_out_rhand_align = mesh_out_rhand - np.dot(
smpl_x.J_regressor,
mesh_out)[smpl_x.J_regressor_idx['rwrist'], None, :] + np.dot(
smpl_x.J_regressor,
mesh_gt)[smpl_x.J_regressor_idx['rwrist'], None, :]
eval_result['mpvpe_l_hand'].append(
np.sqrt(np.sum(
(mesh_out_lhand_align - mesh_gt_lhand)**2, 1)).mean() *
1000)
eval_result['mpvpe_r_hand'].append(
np.sqrt(np.sum(
(mesh_out_rhand_align - mesh_gt_rhand)**2, 1)).mean() *
1000)
eval_result['mpvpe_hand'].append(
(np.sqrt(np.sum(
(mesh_out_lhand_align - mesh_gt_lhand)**2, 1)).mean() *
1000 +
np.sqrt(np.sum(
(mesh_out_rhand_align - mesh_gt_rhand)**2, 1)).mean() *
1000) / 2.)
mesh_out_lhand_align = rigid_align(mesh_out_lhand, mesh_gt_lhand)
mesh_out_rhand_align = rigid_align(mesh_out_rhand, mesh_gt_rhand)
eval_result['pa_mpvpe_l_hand'].append(
np.sqrt(np.sum(
(mesh_out_lhand_align - mesh_gt_lhand)**2, 1)).mean() *
1000)
eval_result['pa_mpvpe_r_hand'].append(
np.sqrt(np.sum(
(mesh_out_rhand_align - mesh_gt_rhand)**2, 1)).mean() *
1000)
eval_result['pa_mpvpe_hand'].append(
(np.sqrt(np.sum(
(mesh_out_lhand_align - mesh_gt_lhand)**2, 1)).mean() *
1000 +
np.sqrt(np.sum(
(mesh_out_rhand_align - mesh_gt_rhand)**2, 1)).mean() *
1000) / 2.)
if self.__class__.__name__ == 'UBody':
joint_gt_body_wo_trans = np.dot(smpl_x.j14_regressor,
mesh_gt)
import ipdb;ipdb.set_trace()
img_wh = out['gt_img_shape'].flip(-1)
joint_gt_body_proj = project_points_new(
points_3d=joint_gt_body_wo_trans,
pred_cam=cam_trans,
focal_length=5000,
camera_center=img_wh/2
) # origin image space
joint_gt_lhand_wo_trans = np.dot(
smpl_x.orig_hand_regressor['left'], mesh_gt)
joint_gt_lhand_proj = project_points_new(
points_3d=joint_gt_lhand_wo_trans,
pred_cam=cam_trans,
focal_length=5000,
camera_center=img_wh/2
) # origin image space
joint_gt_rhand_wo_trans = np.dot(
smpl_x.orig_hand_regressor['left'], mesh_gt)
joint_gt_rhand_proj = project_points_new(
points_3d=joint_gt_rhand_wo_trans,
pred_cam=cam_trans,
focal_length=5000,
camera_center=img_wh/2
) # origin image space
mesh_gt_proj = project_points_new(
points_3d=mesh_gt,
pred_cam=cam_trans,
focal_length=5000,
camera_center=img_wh/2)
joint_gt_body_valid = self.validate_within_img(
img, joint_gt_body_proj)
joint_gt_lhand_valid = self.validate_within_img(
img, joint_gt_lhand_proj)
joint_gt_rhand_valid = self.validate_within_img(
img, joint_gt_rhand_proj)
mesh_valid = self.validate_within_img(img, mesh_gt_proj)
mesh_lhand_valid = mesh_valid[smpl_x.hand_vertex_idx['left_hand']]
mesh_rhand_valid = mesh_valid[smpl_x.hand_vertex_idx['right_hand']]
mesh_face_valid = mesh_valid[smpl_x.face_vertex_idx]
# MPVPE from face vertices
mesh_gt_face = mesh_gt[smpl_x.face_vertex_idx, :]
mesh_out_face = mesh_out[smpl_x.face_vertex_idx, :]
mesh_out_face_align = mesh_out_face - np.dot(
smpl_x.J_regressor,
mesh_out)[smpl_x.J_regressor_idx['neck'], None, :] + np.dot(
smpl_x.J_regressor,
mesh_gt)[smpl_x.J_regressor_idx['neck'], None, :]
eval_result['mpvpe_face'].append(
np.sqrt(np.sum(
(mesh_out_face_align - mesh_gt_face)**2, 1)).mean() * 1000)
mesh_out_face_align = rigid_align(mesh_out_face, mesh_gt_face)
eval_result['pa_mpvpe_face'].append(
np.sqrt(np.sum(
(mesh_out_face_align - mesh_gt_face)**2, 1)).mean() * 1000)
# MPJPE from body joints
joint_gt_body = np.dot(smpl_x.j14_regressor, mesh_gt)
joint_out_body = np.dot(smpl_x.j14_regressor, mesh_out)
joint_out_body_align = rigid_align(joint_out_body, joint_gt_body)
eval_result['pa_mpjpe_body'].append(
np.sqrt(np.sum((joint_out_body_align - joint_gt_body)**2,
1))[joint_gt_body_valid].mean() * 1000)
# eval_result['pa_mpjpe_body'].append(
# np.sqrt(np.sum(
# (joint_out_body_align - joint_gt_body)**2, 1)).mean() *
# 1000)
# MPJPE from hand joints
joint_gt_lhand = np.dot(smpl_x.orig_hand_regressor['left'],
mesh_gt)
joint_out_lhand = np.dot(smpl_x.orig_hand_regressor['left'],
mesh_out)
joint_out_lhand_align = rigid_align(joint_out_lhand,
joint_gt_lhand)
joint_gt_rhand = np.dot(smpl_x.orig_hand_regressor['right'],
mesh_gt)
joint_out_rhand = np.dot(smpl_x.orig_hand_regressor['right'],
mesh_out)
joint_out_rhand_align = rigid_align(joint_out_rhand,
joint_gt_rhand)
# if self.__class__.__name__ == 'UBody':
if sum(joint_gt_lhand_valid) != 0:
pa_mpjpe_lhand = np.sqrt(
np.sum((joint_out_lhand_align - joint_gt_lhand)**2,
1))[joint_gt_lhand_valid].mean() * 1000
pa_mpjpe_hand.append(pa_mpjpe_lhand)
eval_result['pa_mpjpe_l_hand'].append(pa_mpjpe_lhand)
if sum(joint_gt_rhand_valid) != 0:
pa_mpjpe_rhand = np.sqrt(
np.sum((joint_out_rhand_align - joint_gt_rhand)**2,
1))[joint_gt_rhand_valid].mean() * 1000
pa_mpjpe_hand.append(pa_mpjpe_rhand)
eval_result['pa_mpjpe_r_hand'].append(pa_mpjpe_rhand)
if len(pa_mpjpe_hand) > 0:
eval_result['pa_mpjpe_hand'].append(np.mean(pa_mpjpe_hand))
eval_result['pa_mpjpe_l_hand'].append(
np.sqrt(np.sum(
(joint_out_lhand_align - joint_gt_lhand)**2, 1)).mean() *
1000)
eval_result['pa_mpjpe_r_hand'].append(
np.sqrt(np.sum(
(joint_out_rhand_align - joint_gt_rhand)**2, 1)).mean() *
1000)
eval_result['pa_mpjpe_hand'].append(
(np.sqrt(np.sum(
(joint_out_lhand_align - joint_gt_lhand)**2, 1)).mean() *
1000 +
np.sqrt(np.sum(
(joint_out_rhand_align - joint_gt_rhand)**2, 1)).mean() *
1000) / 2.)
return eval_result
def print_eval_result(self, eval_result):
print(f'======{cfg.testset}======')
print('PA MPVPE (All): %.2f mm' % np.mean(eval_result['pa_mpvpe_all']))
print('PA MPVPE (L-Hands): %.2f mm' %
np.mean(eval_result['pa_mpvpe_l_hand']))
print('PA MPVPE (R-Hands): %.2f mm' %
np.mean(eval_result['pa_mpvpe_r_hand']))
print('PA MPVPE (Hands): %.2f mm' %
np.mean(eval_result['pa_mpvpe_hand']))
print('PA MPVPE (Face): %.2f mm' %
np.mean(eval_result['pa_mpvpe_face']))
print()
print('MPVPE (All): %.2f mm' % np.mean(eval_result['mpvpe_all']))
print('MPVPE (L-Hands): %.2f mm' %
np.mean(eval_result['mpvpe_l_hand']))
print('MPVPE (R-Hands): %.2f mm' %
np.mean(eval_result['mpvpe_r_hand']))
print('MPVPE (Hands): %.2f mm' % np.mean(eval_result['mpvpe_hand']))
print('MPVPE (Face): %.2f mm' % np.mean(eval_result['mpvpe_face']))
print()
print('PA MPJPE (Body): %.2f mm' %
np.mean(eval_result['pa_mpjpe_body']))
print('PA MPJPE (L-Hands): %.2f mm' %
np.mean(eval_result['pa_mpjpe_l_hand']))
print('PA MPJPE (R-Hands): %.2f mm' %
np.mean(eval_result['pa_mpjpe_r_hand']))
print('PA MPJPE (Hands): %.2f mm' %
np.mean(eval_result['pa_mpjpe_hand']))
f = open(os.path.join(cfg.result_dir, 'result.txt'), 'w')
f.write(f'{cfg.testset} dataset \n')
f.write('PA MPVPE (All): %.2f mm\n' %
np.mean(eval_result['pa_mpvpe_all']))
f.write('PA MPVPE (L-Hands): %.2f mm' %
np.mean(eval_result['pa_mpvpe_l_hand']))
f.write('PA MPVPE (R-Hands): %.2f mm' %
np.mean(eval_result['pa_mpvpe_r_hand']))
f.write('PA MPVPE (Hands): %.2f mm\n' %
np.mean(eval_result['pa_mpvpe_hand']))
f.write('PA MPVPE (Face): %.2f mm\n' %
np.mean(eval_result['pa_mpvpe_face']))
f.write('MPVPE (All): %.2f mm\n' % np.mean(eval_result['mpvpe_all']))
f.write('MPVPE (L-Hands): %.2f mm' %
np.mean(eval_result['mpvpe_l_hand']))
f.write('MPVPE (R-Hands): %.2f mm' %
np.mean(eval_result['mpvpe_r_hand']))
f.write('MPVPE (Hands): %.2f mm' % np.mean(eval_result['mpvpe_hand']))
f.write('MPVPE (Face): %.2f mm\n' % np.mean(eval_result['mpvpe_face']))
f.write('PA MPJPE (Body): %.2f mm\n' %
np.mean(eval_result['pa_mpjpe_body']))
f.write('PA MPJPE (L-Hands): %.2f mm' %
np.mean(eval_result['pa_mpjpe_l_hand']))
f.write('PA MPJPE (R-Hands): %.2f mm' %
np.mean(eval_result['pa_mpjpe_r_hand']))
f.write('PA MPJPE (Hands): %.2f mm\n' %
np.mean(eval_result['pa_mpjpe_hand']))
def validate_within_img_batch(
self, img_wh, points): # check whether the points is within the image
# img: (h, w, c), points: (num_points, 2)
valid_mask = np.logical_and((points-img_wh[:,None])<0,points>0)
valid_mask = np.logical_and(valid_mask[:,:,0],valid_mask[:,:,1])
return valid_mask
def decompress_keypoints(self, humandata) -> None:
"""If a key contains 'keypoints', and f'{key}_mask' is in self.keys(),
invalid zeros will be inserted to the right places and f'{key}_mask'
will be unlocked.
Raises:
KeyError:
A key contains 'keypoints' has been found
but its corresponding mask is missing.
"""
assert bool(humandata['__keypoints_compressed__']) is True
key_pairs = []
for key in humandata.files:
if key not in KPS2D_KEYS + KPS3D_KEYS:
continue
mask_key = f'{key}_mask'
if mask_key in humandata.files:
print(f'Decompress {key}...')
key_pairs.append([key, mask_key])
decompressed_dict = {}
for kpt_key, mask_key in key_pairs:
mask_array = np.asarray(humandata[mask_key])
compressed_kpt = humandata[kpt_key]
kpt_array = \
self.add_zero_pad(compressed_kpt, mask_array)
decompressed_dict[kpt_key] = kpt_array
del humandata
return decompressed_dict
def add_zero_pad(self, compressed_array: np.ndarray,
mask_array: np.ndarray) -> np.ndarray:
"""Pad zeros to a compressed keypoints array.
Args:
compressed_array (np.ndarray):
A compressed keypoints array.
mask_array (np.ndarray):
The mask records compression relationship.
Returns:
np.ndarray:
A keypoints array in full-size.
"""
assert mask_array.sum() == compressed_array.shape[1]
data_len, _, dim = compressed_array.shape
mask_len = mask_array.shape[0]
ret_value = np.zeros(shape=[data_len, mask_len, dim],
dtype=compressed_array.dtype)
valid_mask_index = np.where(mask_array == 1)[0]
ret_value[:, valid_mask_index, :] = compressed_array
return ret_value