Spaces:
Starting
on
L40S
Starting
on
L40S
import os | |
import os.path as osp | |
from glob import glob | |
import numpy as np | |
from config.config import cfg | |
import copy | |
import json | |
import pickle | |
import cv2 | |
import torch | |
from pycocotools.coco import COCO | |
from util.human_models import smpl_x | |
from util.preprocessing import load_img, sanitize_bbox, process_bbox, load_ply, load_obj | |
from util.transforms import rigid_align, rigid_align_batch | |
import tqdm | |
import random | |
from util.formatting import DefaultFormatBundle | |
from detrsmpl.data.datasets.pipelines.transforms import Normalize | |
import time | |
from util.preprocessing import ( | |
load_img, process_bbox, augmentation_instance_sample | |
,process_human_model_output_batch_simplify,process_db_coord_batch_no_valid) | |
# from util.human_models import smpl_x | |
from .humandata import HumanDataset | |
import csv | |
KPS2D_KEYS = [ | |
'keypoints2d_ori', 'keypoints2d_smplx', 'keypoints2d_smpl', | |
'keypoints2d_original','keypoints2d_gta' | |
] | |
KPS3D_KEYS = [ | |
'keypoints3d_cam', 'keypoints3d', 'keypoints3d_smplx', 'keypoints3d_smpl', | |
'keypoints3d_original', 'keypoints3d_gta' | |
] | |
class AGORA_MM(HumanDataset): | |
def __init__(self, transform, data_split): | |
super(AGORA_MM, self).__init__(transform, data_split) | |
self.img_shape = [2160,3840] | |
pre_prc_file_train = 'spec_train_smpl.npz' | |
pre_prc_file_test = 'spec_test_smpl.npz' | |
self.save_idx = 0 | |
if self.data_split == 'train': | |
filename = getattr(cfg, 'filename', pre_prc_file_train) | |
else: | |
self.test_set = 'val' | |
self.img_dir = './data/datasets/agora' | |
if data_split == 'train': | |
if self.img_shape == [2160,3840]: | |
self.annot_path = 'data/preprocessed_npz/multihuman_data/agora_train_3840_w_occ_multi_2010.npz' | |
self.annot_path_cache = 'data/preprocessed_npz/cache/agora_train_3840_w_occ_cache_2010.npz' | |
elif self.img_shape == [720,1280]: | |
self.annot_path = 'data/preprocessed_npz/multihuman_data/agora_train_1280_multi_1010.npz' | |
self.annot_path_cache = 'data/preprocessed_npz/cache/agora_train_cache_1280_1010.npz' | |
elif data_split == 'test': | |
if self.img_shape == [2160,3840]: | |
self.annot_path = 'data/preprocessed_npz/multihuman_data/agora_validation_multi_3840_1010.npz' | |
self.annot_path_cache = 'data/preprocessed_npz/cache/agora_validation_cache_3840_1010_occ_cache_balance.npz' | |
elif self.img_shape == [720,1280]: | |
self.annot_path = 'data/preprocessed_npz/multihuman_data/agora_validation_1280_1010_occ.npz' | |
self.annot_path_cache = 'data/preprocessed_npz/cache/agora_validation_cache_1280_1010_occ.npz' | |
self.use_cache = getattr(cfg, 'use_cache', False) | |
self.cam_param = {} | |
# load data or cache | |
if self.use_cache and osp.isfile(self.annot_path_cache): | |
print(f'[{self.__class__.__name__}] loading cache from {self.annot_path_cache}') | |
self.datalist = self.load_cache(self.annot_path_cache) | |
else: | |
if self.use_cache: | |
print(f'[{self.__class__.__name__}] Cache not found, generating cache...') | |
self.datalist = self.load_data( | |
train_sample_interval=getattr(cfg, f'{self.__class__.__name__}_train_sample_interval', 1)) | |
if self.use_cache: | |
self.save_cache(self.annot_path_cache, self.datalist) | |
def load_data(self, train_sample_interval=1): | |
content = np.load(self.annot_path, allow_pickle=True) | |
try: | |
frame_range = content['frame_range'] | |
except KeyError: | |
frame_range = \ | |
np.array([[i, i + 1] for i in range(self.num_data)]) | |
num_examples = len(frame_range) | |
if 'meta' in content: | |
meta = content['meta'].item() | |
print('meta keys:', meta.keys()) | |
else: | |
meta = None | |
print( | |
'No meta info provided! Please give height and width manually') | |
print( | |
f'Start loading humandata {self.annot_path} into memory...\nDataset includes: {content.files}' | |
) | |
tic = time.time() | |
image_path = content['image_path'] | |
if meta is not None and 'height' in meta: | |
height = np.array(meta['height']) | |
width = np.array(meta['width']) | |
image_shape = np.stack([height, width], axis=-1) | |
else: | |
image_shape = None | |
if meta is not None and 'gender' in meta and len(meta['gender']) != 0: | |
gender = meta['gender'] | |
else: | |
gender = None | |
if meta is not None and 'is_kid' in meta and len(meta['is_kid']) != 0: | |
is_kid = meta['is_kid'] | |
else: | |
is_kid = None | |
bbox_xywh = content['bbox_xywh'] | |
if 'smplx' in content: | |
smplx = content['smplx'].item() | |
as_smplx = 'smplx' | |
elif 'smpl' in content: | |
smplx = content['smpl'].item() | |
as_smplx = 'smpl' | |
elif 'smplh' in content: | |
smplx = content['smplh'].item() | |
as_smplx = 'smplh' | |
# TODO: temp solution, should be more general. But SHAPY is very special | |
elif self.__class__.__name__ == 'SHAPY': | |
smplx = {} | |
else: | |
raise KeyError('No SMPL for SMPLX available, please check keys:\n' | |
f'{content.files}') | |
print('Smplx param', smplx.keys()) | |
if 'lhand_bbox_xywh' in content and 'rhand_bbox_xywh' in content: | |
lhand_bbox_xywh = content['lhand_bbox_xywh'] | |
rhand_bbox_xywh = content['rhand_bbox_xywh'] | |
else: | |
lhand_bbox_xywh = np.zeros_like(bbox_xywh) | |
rhand_bbox_xywh = np.zeros_like(bbox_xywh) | |
if 'face_bbox_xywh' in content: | |
face_bbox_xywh = content['face_bbox_xywh'] | |
else: | |
face_bbox_xywh = np.zeros_like(bbox_xywh) | |
decompressed = False | |
if content['__keypoints_compressed__']: | |
decompressed_kps = self.decompress_keypoints(content) | |
decompressed = True | |
keypoints3d = None | |
valid_kps3d = False | |
keypoints3d_mask = None | |
valid_kps3d_mask = False | |
# processing keypoints | |
for kps3d_key in KPS3D_KEYS: | |
if kps3d_key in content: | |
keypoints3d = decompressed_kps[kps3d_key][:, self.SMPLX_137_MAPPING, :] if decompressed \ | |
else content[kps3d_key][:, self.SMPLX_137_MAPPING, :] | |
valid_kps3d = True | |
if keypoints3d.shape[-1] == 4: | |
valid_kps3d_mask = True | |
break | |
if self.keypoints2d is not None: | |
keypoints2d = decompressed_kps[self.keypoints2d][:, self.SMPLX_137_MAPPING, :] if decompressed \ | |
else content[self.keypoints2d][:, self.SMPLX_137_MAPPING, :] | |
else: | |
for kps2d_key in KPS2D_KEYS: | |
if kps2d_key in content: | |
keypoints2d = decompressed_kps[kps2d_key][:, self.SMPLX_137_MAPPING, :] if decompressed \ | |
else content[kps2d_key][:, self.SMPLX_137_MAPPING, :] | |
if keypoints2d.shape[-1] == 3: | |
valid_kps3d_mask = True | |
occlusion = content['meta'][()]['occ'] if 'occ' in content['meta'][()] and len(content['meta'][()]['occ'])>0 else None | |
print('Done. Time: {:.2f}s'.format(time.time() - tic)) | |
datalist = [] | |
# num_examples | |
# processing each image, filter according to bbox valid | |
for i in tqdm.tqdm(range(int(num_examples))): | |
if self.data_split == 'train' and i % train_sample_interval != 0: | |
continue | |
frame_start, frame_end = frame_range[i] | |
img_path = osp.join(self.img_dir, image_path[frame_start]) | |
# im_shape = cv2.imread(img_path).shape[:2] | |
img_shape = image_shape[ | |
frame_start] if image_shape is not None else self.img_shape | |
bbox_list = bbox_xywh[frame_start:frame_end, :4] | |
valid_idx = [] | |
body_bbox_list = [] | |
if hasattr(cfg, 'bbox_ratio'): | |
bbox_ratio = cfg.bbox_ratio * 0.833 # preprocess body bbox is giving 1.2 box padding | |
else: | |
bbox_ratio = 1.25 | |
for bbox_i, bbox in enumerate(bbox_list): | |
bbox = process_bbox(bbox, | |
img_width=img_shape[1], | |
img_height=img_shape[0], | |
ratio=bbox_ratio) | |
if bbox is None: | |
continue | |
else: | |
valid_idx.append(frame_start + bbox_i) | |
bbox[2:] += bbox[:2] | |
body_bbox_list.append(bbox) | |
if len(valid_idx) == 0: | |
continue | |
valid_num = len(valid_idx) | |
# hand/face bbox | |
lhand_bbox_list = [] | |
rhand_bbox_list = [] | |
face_bbox_list = [] | |
for bbox_i in valid_idx: | |
lhand_bbox = lhand_bbox_xywh[bbox_i] | |
rhand_bbox = rhand_bbox_xywh[bbox_i] | |
face_bbox = face_bbox_xywh[bbox_i] | |
if lhand_bbox[-1] > 0: # conf > 0 | |
lhand_bbox = lhand_bbox[:4] | |
if hasattr(cfg, 'bbox_ratio'): | |
lhand_bbox = process_bbox(lhand_bbox, | |
img_width=img_shape[1], | |
img_height=img_shape[0], | |
ratio=bbox_ratio) | |
if lhand_bbox is not None: | |
lhand_bbox[2:] += lhand_bbox[:2] # xywh -> xyxy | |
else: | |
lhand_bbox = None | |
if rhand_bbox[-1] > 0: | |
rhand_bbox = rhand_bbox[:4] | |
if hasattr(cfg, 'bbox_ratio'): | |
rhand_bbox = process_bbox(rhand_bbox, | |
img_width=img_shape[1], | |
img_height=img_shape[0], | |
ratio=bbox_ratio) | |
if rhand_bbox is not None: | |
rhand_bbox[2:] += rhand_bbox[:2] # xywh -> xyxy | |
else: | |
rhand_bbox = None | |
if face_bbox[-1] > 0: | |
face_bbox = face_bbox[:4] | |
if hasattr(cfg, 'bbox_ratio'): | |
face_bbox = process_bbox(face_bbox, | |
img_width=img_shape[1], | |
img_height=img_shape[0], | |
ratio=bbox_ratio) | |
if face_bbox is not None: | |
face_bbox[2:] += face_bbox[:2] # xywh -> xyxy | |
else: | |
face_bbox = None | |
lhand_bbox_list.append(lhand_bbox) | |
rhand_bbox_list.append(rhand_bbox) | |
face_bbox_list.append(face_bbox) | |
# lhand_bbox = np.stack(lhand_bbox_list,axis=0) | |
# rhand_bbox = np.stack(rhand_bbox_list,axis=0) | |
# face_bbox = np.stack(face_bbox_list,axis=0) | |
joint_img = keypoints2d[valid_idx] | |
# num_joints = joint_cam.shape[0] | |
# joint_valid = np.ones((num_joints, 1)) | |
if valid_kps3d: | |
joint_cam = keypoints3d[valid_idx] | |
else: | |
joint_cam = None | |
if 'leye_pose_0' in smplx.keys(): | |
smplx.pop('leye_pose_0') | |
if 'leye_pose_1' in smplx.keys(): | |
smplx.pop('leye_pose_1') | |
if 'leye_pose' in smplx.keys(): | |
smplx.pop('leye_pose') | |
if 'reye_pose_0' in smplx.keys(): | |
smplx.pop('reye_pose_0') | |
if 'reye_pose_1' in smplx.keys(): | |
smplx.pop('reye_pose_1') | |
if 'reye_pose' in smplx.keys(): | |
smplx.pop('reye_pose') | |
occlusion_frame = occlusion[valid_idx] \ | |
if occlusion is not None else np.array([1]*(valid_num)) | |
smplx_param = {k: v[valid_idx] for k, v in smplx.items()} | |
gender_ = gender[valid_idx] \ | |
if gender is not None else np.array(['neutral']*(valid_num)) | |
is_kid_ = is_kid[valid_idx] \ | |
if is_kid is not None else np.array([1]*(valid_num)) | |
lhand_bbox_valid = lhand_bbox_xywh[valid_idx,4] | |
rhand_bbox_valid = rhand_bbox_xywh[valid_idx,4] | |
face_bbox_valid = face_bbox_xywh[valid_idx,4] | |
smplx_param['root_pose'] = smplx_param.pop('global_orient', None) | |
smplx_param['shape'] = smplx_param.pop('betas', None) | |
smplx_param['trans'] = smplx_param.pop('transl', np.zeros(3)) | |
smplx_param['lhand_pose'] = smplx_param.pop('left_hand_pose', None) | |
smplx_param['rhand_pose'] = smplx_param.pop( | |
'right_hand_pose', None) | |
smplx_param['expr'] = smplx_param.pop('expression', None) | |
# TODO do not fix betas, give up shape supervision | |
if 'betas_neutral' in smplx_param and self.data_split == 'train': | |
smplx_param['shape'] = smplx_param.pop('betas_neutral') | |
# smplx_param['shape'] = np.zeros(10, dtype=np.float32) | |
if smplx_param['lhand_pose'] is None or self.body_only == True: | |
smplx_param['lhand_valid'] = np.zeros(valid_num, dtype=np.bool8) | |
else: | |
smplx_param['lhand_valid'] = lhand_bbox_valid.astype(np.bool8) | |
if smplx_param['rhand_pose'] is None or self.body_only == True: | |
smplx_param['rhand_valid'] = np.zeros(valid_num, dtype=np.bool8) | |
else: | |
smplx_param['rhand_valid'] = rhand_bbox_valid.astype(np.bool8) | |
if smplx_param['expr'] is None or self.body_only == True: | |
smplx_param['face_valid'] = np.zeros(valid_num, dtype=np.bool8) | |
else: | |
smplx_param['face_valid'] = face_bbox_valid.astype(np.bool8) | |
if joint_cam is not None and np.any(np.isnan(joint_cam)): | |
continue | |
datalist.append({ | |
'img_path': img_path, | |
'img_shape': img_shape, | |
'bbox': body_bbox_list, | |
'lhand_bbox': lhand_bbox_list, | |
'rhand_bbox': rhand_bbox_list, | |
'face_bbox': face_bbox_list, | |
'joint_img': joint_img, | |
'joint_cam': joint_cam, | |
'smplx_param': smplx_param, | |
'as_smplx': as_smplx, | |
'gender': gender_, | |
'occlusion': occlusion_frame, | |
'is_kid': is_kid_, | |
}) | |
# save memory | |
del content, image_path, bbox_xywh, lhand_bbox_xywh, rhand_bbox_xywh, face_bbox_xywh, keypoints3d, keypoints2d | |
if self.data_split == 'train': | |
print(f'[{self.__class__.__name__} train] original size:', | |
int(num_examples), '. Sample interval:', | |
train_sample_interval, '. Sampled size:', len(datalist)) | |
if getattr(cfg, 'data_strategy', | |
None) == 'balance' and self.data_split == 'train': | |
print( | |
f'[{self.__class__.__name__}] Using [balance] strategy with datalist shuffled...' | |
) | |
random.shuffle(datalist) | |
return datalist | |
def __getitem__(self, idx): | |
try: | |
data = copy.deepcopy(self.datalist[idx]) | |
except Exception as e: | |
print(f'[{self.__class__.__name__}] Error loading data {idx}') | |
print(e) | |
exit(0) | |
img_path, img_shape, bbox = \ | |
data['img_path'], data['img_shape'], data['bbox'] | |
as_smplx = data['as_smplx'] | |
gender = data['gender'].copy() | |
for gender_str, gender_num in { | |
'neutral': -1, 'male': 0, 'female': 1}.items(): | |
gender[gender==gender_str]=gender_num | |
gender = gender.astype(int) | |
img_whole_bbox = np.array([0, 0, img_shape[1], img_shape[0]]) | |
img = load_img(img_path, order='BGR') | |
num_person = len(data['bbox']) | |
data_name = self.__class__.__name__ | |
img, img2bb_trans, bb2img_trans, rot, do_flip = \ | |
augmentation_instance_sample(img, img_whole_bbox, self.data_split,data,data_name) | |
cropped_img_shape=img.shape[:2] | |
num_person = len(data['bbox']) | |
if self.data_split == 'train': | |
joint_cam = data['joint_cam'] # num, 137,4 | |
if joint_cam is not None: | |
dummy_cord = False | |
joint_cam[:,:,:3] = \ | |
joint_cam[:,:,:3] - joint_cam[:, self.joint_set['root_joint_idx'], None, :3] # root-relative | |
else: | |
# dummy cord as joint_cam | |
dummy_cord = True | |
joint_cam = np.zeros( | |
(num_person, self.joint_set['joint_num'], 4), | |
dtype=np.float32) | |
joint_img = data['joint_img'] | |
# do rotation on keypoints | |
joint_img_aug, joint_cam_wo_ra, joint_cam_ra, joint_trunc = \ | |
process_db_coord_batch_no_valid( | |
joint_img, joint_cam, do_flip, img_shape, | |
self.joint_set['flip_pairs'], img2bb_trans, rot, | |
self.joint_set['joints_name'], smpl_x.joints_name, | |
cropped_img_shape) | |
joint_img_aug[:,:,2:] = joint_img_aug[:,:,2:] * joint_trunc | |
# smplx coordinates and parameters | |
smplx_param = data['smplx_param'] | |
smplx_pose, smplx_shape, smplx_expr, smplx_pose_valid, \ | |
smplx_joint_valid, smplx_expr_valid, smplx_shape_valid = \ | |
process_human_model_output_batch_simplify( | |
smplx_param, do_flip, rot, as_smplx) | |
# if cam not provided, we take joint_img as smplx joint 2d, | |
# which is commonly the case for our processed humandata | |
# change smplx_shape if use_betas_neutral | |
# processing follows that in process_human_model_output | |
if self.use_betas_neutral: | |
smplx_shape = smplx_param['betas_neutral'].reshape( | |
num_person, -1) | |
smplx_shape[(np.abs(smplx_shape) > 3).any(axis=1)] = 0. | |
smplx_shape = smplx_shape.reshape(num_person, -1) | |
# SMPLX joint coordinate validity | |
# for name in ('L_Big_toe', 'L_Small_toe', 'L_Heel', 'R_Big_toe', 'R_Small_toe', 'R_Heel'): | |
# smplx_joint_valid[smpl_x.joints_name.index(name)] = 0 | |
smplx_joint_valid = smplx_joint_valid[:, :, None] | |
lhand_bbox_center_list = [] | |
lhand_bbox_valid_list = [] | |
lhand_bbox_size_list = [] | |
lhand_bbox_list = [] | |
face_bbox_center_list = [] | |
face_bbox_size_list = [] | |
face_bbox_valid_list = [] | |
face_bbox_list = [] | |
rhand_bbox_center_list = [] | |
rhand_bbox_valid_list = [] | |
rhand_bbox_size_list = [] | |
rhand_bbox_list = [] | |
body_bbox_center_list = [] | |
body_bbox_size_list = [] | |
body_bbox_valid_list = [] | |
body_bbox_list = [] | |
for i in range(num_person): | |
body_bbox, body_bbox_valid = self.process_hand_face_bbox( | |
data['bbox'][i], do_flip, img_shape, img2bb_trans, | |
cropped_img_shape) | |
lhand_bbox, lhand_bbox_valid = self.process_hand_face_bbox( | |
data['lhand_bbox'][i], do_flip, img_shape, img2bb_trans, | |
cropped_img_shape) | |
lhand_bbox_valid *= smplx_param['lhand_valid'][i] | |
rhand_bbox, rhand_bbox_valid = self.process_hand_face_bbox( | |
data['rhand_bbox'][i], do_flip, img_shape, img2bb_trans, | |
cropped_img_shape) | |
rhand_bbox_valid *= smplx_param['rhand_valid'][i] | |
face_bbox, face_bbox_valid = self.process_hand_face_bbox( | |
data['face_bbox'][i], do_flip, img_shape, img2bb_trans, | |
cropped_img_shape) | |
face_bbox_valid *= smplx_param['face_valid'][i] | |
if do_flip: | |
lhand_bbox, rhand_bbox = rhand_bbox, lhand_bbox | |
lhand_bbox_valid, rhand_bbox_valid = rhand_bbox_valid, lhand_bbox_valid | |
body_bbox_list.append(body_bbox) | |
lhand_bbox_list.append(lhand_bbox) | |
rhand_bbox_list.append(rhand_bbox) | |
face_bbox_list.append(face_bbox) | |
lhand_bbox_center = (lhand_bbox[0] + lhand_bbox[1]) / 2. | |
rhand_bbox_center = (rhand_bbox[0] + rhand_bbox[1]) / 2. | |
face_bbox_center = (face_bbox[0] + face_bbox[1]) / 2. | |
body_bbox_center = (body_bbox[0] + body_bbox[1]) / 2. | |
lhand_bbox_size = lhand_bbox[1] - lhand_bbox[0] | |
rhand_bbox_size = rhand_bbox[1] - rhand_bbox[0] | |
face_bbox_size = face_bbox[1] - face_bbox[0] | |
body_bbox_size = body_bbox[1] - body_bbox[0] | |
lhand_bbox_center_list.append(lhand_bbox_center) | |
lhand_bbox_valid_list.append(lhand_bbox_valid) | |
lhand_bbox_size_list.append(lhand_bbox_size) | |
face_bbox_center_list.append(face_bbox_center) | |
face_bbox_size_list.append(face_bbox_size) | |
face_bbox_valid_list.append(face_bbox_valid) | |
rhand_bbox_center_list.append(rhand_bbox_center) | |
rhand_bbox_valid_list.append(rhand_bbox_valid) | |
rhand_bbox_size_list.append(rhand_bbox_size) | |
body_bbox_center_list.append(body_bbox_center) | |
body_bbox_size_list.append(body_bbox_size) | |
body_bbox_valid_list.append(body_bbox_valid) | |
body_bbox = np.stack(body_bbox_list, axis=0) | |
lhand_bbox = np.stack(lhand_bbox_list, axis=0) | |
rhand_bbox = np.stack(rhand_bbox_list, axis=0) | |
face_bbox = np.stack(face_bbox_list, axis=0) | |
lhand_bbox_center = np.stack(lhand_bbox_center_list, axis=0) | |
lhand_bbox_valid = np.stack(lhand_bbox_valid_list, axis=0) | |
lhand_bbox_size = np.stack(lhand_bbox_size_list, axis=0) | |
face_bbox_center = np.stack(face_bbox_center_list, axis=0) | |
face_bbox_size = np.stack(face_bbox_size_list, axis=0) | |
face_bbox_valid = np.stack(face_bbox_valid_list, axis=0) | |
body_bbox_center = np.stack(body_bbox_center_list, axis=0) | |
body_bbox_size = np.stack(body_bbox_size_list, axis=0) | |
body_bbox_valid = np.stack(body_bbox_valid_list, axis=0) | |
rhand_bbox_center = np.stack(rhand_bbox_center_list, axis=0) | |
rhand_bbox_valid = np.stack(rhand_bbox_valid_list, axis=0) | |
rhand_bbox_size = np.stack(rhand_bbox_size_list, axis=0) | |
if 'occlusion' in data: | |
occlusion = data['occlusion'] | |
occ_mask = occlusion<97 | |
joint_img_aug[:,:,2] = joint_img_aug[:,:,2]*occ_mask[:,None] | |
joint_cam_wo_ra[:,:,3] = joint_cam_wo_ra[:,:,3]*occ_mask[:,None] | |
joint_trunc = joint_trunc*occ_mask[:,None,None] | |
smplx_pose_valid = smplx_pose_valid*occ_mask[:,None] | |
smplx_joint_valid = smplx_joint_valid*occ_mask[:,None,None] | |
smplx_expr_valid = smplx_expr_valid*occ_mask | |
smplx_shape_valid = smplx_shape_valid*occ_mask | |
rhand_bbox_valid = rhand_bbox_valid*occ_mask | |
lhand_bbox_valid = lhand_bbox_valid*occ_mask | |
face_bbox_valid = face_bbox_valid*occ_mask | |
if 'is_kid' in data: | |
is_kid = data['is_kid'].copy() | |
smplx_shape_valid = smplx_shape_valid * (is_kid==0) | |
inputs = {'img': img} | |
joint_img_aug[:,:,2] = joint_img_aug[:,:,2] * body_bbox_valid[:,None] | |
is_3D = float(False) if dummy_cord else float(True) | |
targets = { | |
# keypoints2d, [0,img_w],[0,img_h] -> [0,1] -> [0,output_hm_shape] | |
'joint_img': joint_img_aug[body_bbox_valid>0], | |
# joint_cam, kp3d wo ra # raw kps3d probably without ra | |
'joint_cam': joint_cam_wo_ra[body_bbox_valid>0], | |
# kps3d with body, face, hand ra | |
'smplx_joint_cam': joint_cam_ra[body_bbox_valid>0], | |
'smplx_pose': smplx_pose[body_bbox_valid>0], | |
'smplx_shape': smplx_shape[body_bbox_valid>0], | |
'smplx_expr': smplx_expr[body_bbox_valid>0], | |
'lhand_bbox_center': lhand_bbox_center[body_bbox_valid>0], | |
'lhand_bbox_size': lhand_bbox_size[body_bbox_valid>0], | |
'rhand_bbox_center': rhand_bbox_center[body_bbox_valid>0], | |
'rhand_bbox_size': rhand_bbox_size[body_bbox_valid>0], | |
'face_bbox_center': face_bbox_center[body_bbox_valid>0], | |
'face_bbox_size': face_bbox_size[body_bbox_valid>0], | |
'body_bbox_center': body_bbox_center[body_bbox_valid>0], | |
'body_bbox_size': body_bbox_size[body_bbox_valid>0], | |
'body_bbox': body_bbox.reshape(-1,4)[body_bbox_valid>0], | |
'lhand_bbox': lhand_bbox.reshape(-1,4)[body_bbox_valid>0], | |
'rhand_bbox': rhand_bbox.reshape(-1,4)[body_bbox_valid>0], | |
'face_bbox': face_bbox.reshape(-1,4)[body_bbox_valid>0], | |
'gender': gender[body_bbox_valid>0]} | |
meta_info = { | |
'joint_trunc': joint_trunc[body_bbox_valid>0], | |
'smplx_pose_valid': smplx_pose_valid[body_bbox_valid>0], | |
'smplx_shape_valid': smplx_shape_valid[body_bbox_valid>0], | |
'smplx_expr_valid': smplx_expr_valid[body_bbox_valid>0], | |
'is_3D': is_3D, | |
'lhand_bbox_valid': lhand_bbox_valid[body_bbox_valid>0], | |
'rhand_bbox_valid': rhand_bbox_valid[body_bbox_valid>0], | |
'face_bbox_valid': face_bbox_valid[body_bbox_valid>0], | |
'body_bbox_valid': body_bbox_valid[body_bbox_valid>0], | |
'img_shape': np.array(img.shape[:2]), | |
'ori_shape':data['img_shape'], | |
'idx': idx | |
} | |
result = {**inputs, **targets, **meta_info} | |
result = self.normalize(result) | |
result = self.format(result) | |
return result | |
if self.data_split == 'test': | |
self.cam_param = {} | |
joint_cam = data['joint_cam'] | |
if joint_cam is not None: | |
dummy_cord = False | |
joint_cam[:,:,:3] = joint_cam[:,:,:3] - joint_cam[ | |
:, self.joint_set['root_joint_idx'], None, :3] # root-relative | |
else: | |
# dummy cord as joint_cam | |
dummy_cord = True | |
joint_cam = np.zeros( | |
(num_person, self.joint_set['joint_num'], 3), | |
dtype=np.float32) | |
joint_img = data['joint_img'] | |
joint_img_aug, joint_cam_wo_ra, joint_cam_ra, joint_trunc = \ | |
process_db_coord_batch_no_valid( | |
joint_img, joint_cam, do_flip, img_shape, | |
self.joint_set['flip_pairs'], img2bb_trans, rot, | |
self.joint_set['joints_name'], smpl_x.joints_name, | |
cropped_img_shape) | |
# smplx coordinates and parameters | |
smplx_param = data['smplx_param'] | |
# smplx_cam_trans = np.array( | |
# smplx_param['trans']) if 'trans' in smplx_param else None | |
# TODO: remove this, seperate smpl and smplx | |
smplx_pose, smplx_shape, smplx_expr, smplx_pose_valid, \ | |
smplx_joint_valid, smplx_expr_valid, smplx_shape_valid = \ | |
process_human_model_output_batch_simplify( | |
smplx_param, do_flip, rot, as_smplx) | |
# if cam not provided, we take joint_img as smplx joint 2d, | |
# which is commonly the case for our processed humandata | |
if self.use_betas_neutral: | |
smplx_shape = smplx_param['betas_neutral'].reshape( | |
num_person, -1) | |
smplx_shape[(np.abs(smplx_shape) > 3).any(axis=1)] = 0. | |
smplx_shape = smplx_shape.reshape(num_person, -1) | |
smplx_joint_valid = smplx_joint_valid[:, :, None] | |
lhand_bbox_center_list = [] | |
lhand_bbox_valid_list = [] | |
lhand_bbox_size_list = [] | |
lhand_bbox_list = [] | |
face_bbox_center_list = [] | |
face_bbox_size_list = [] | |
face_bbox_valid_list = [] | |
face_bbox_list = [] | |
rhand_bbox_center_list = [] | |
rhand_bbox_valid_list = [] | |
rhand_bbox_size_list = [] | |
rhand_bbox_list = [] | |
body_bbox_center_list = [] | |
body_bbox_size_list = [] | |
body_bbox_valid_list = [] | |
body_bbox_list = [] | |
for i in range(num_person): | |
lhand_bbox, lhand_bbox_valid = self.process_hand_face_bbox( | |
data['lhand_bbox'][i], do_flip, img_shape, img2bb_trans, | |
cropped_img_shape) | |
rhand_bbox, rhand_bbox_valid = self.process_hand_face_bbox( | |
data['rhand_bbox'][i], do_flip, img_shape, img2bb_trans, | |
cropped_img_shape) | |
face_bbox, face_bbox_valid = self.process_hand_face_bbox( | |
data['face_bbox'][i], do_flip, img_shape, img2bb_trans, | |
cropped_img_shape) | |
body_bbox, body_bbox_valid = self.process_hand_face_bbox( | |
data['bbox'][i], do_flip, img_shape, img2bb_trans, | |
cropped_img_shape) | |
if do_flip: | |
lhand_bbox, rhand_bbox = rhand_bbox, lhand_bbox | |
lhand_bbox_valid, rhand_bbox_valid = rhand_bbox_valid, lhand_bbox_valid | |
body_bbox_list.append(body_bbox) | |
lhand_bbox_list.append(lhand_bbox) | |
rhand_bbox_list.append(rhand_bbox) | |
face_bbox_list.append(face_bbox) | |
lhand_bbox_center = (lhand_bbox[0] + lhand_bbox[1]) / 2. | |
rhand_bbox_center = (rhand_bbox[0] + rhand_bbox[1]) / 2. | |
face_bbox_center = (face_bbox[0] + face_bbox[1]) / 2. | |
body_bbox_center = (body_bbox[0] + body_bbox[1]) / 2. | |
lhand_bbox_size = lhand_bbox[1] - lhand_bbox[0] | |
rhand_bbox_size = rhand_bbox[1] - rhand_bbox[0] | |
face_bbox_size = face_bbox[1] - face_bbox[0] | |
body_bbox_size = body_bbox[1] - body_bbox[0] | |
lhand_bbox_center_list.append(lhand_bbox_center) | |
lhand_bbox_valid_list.append(lhand_bbox_valid) | |
lhand_bbox_size_list.append(lhand_bbox_size) | |
face_bbox_center_list.append(face_bbox_center) | |
face_bbox_size_list.append(face_bbox_size) | |
face_bbox_valid_list.append(face_bbox_valid) | |
rhand_bbox_center_list.append(rhand_bbox_center) | |
rhand_bbox_valid_list.append(rhand_bbox_valid) | |
rhand_bbox_size_list.append(rhand_bbox_size) | |
body_bbox_center_list.append(body_bbox_center) | |
body_bbox_size_list.append(body_bbox_size) | |
body_bbox_valid_list.append(body_bbox_valid) | |
body_bbox = np.stack(body_bbox_list, axis=0) | |
lhand_bbox = np.stack(lhand_bbox_list, axis=0) | |
rhand_bbox = np.stack(rhand_bbox_list, axis=0) | |
face_bbox = np.stack(face_bbox_list, axis=0) | |
lhand_bbox_center = np.stack(lhand_bbox_center_list, axis=0) | |
lhand_bbox_valid = np.stack(lhand_bbox_valid_list, axis=0) | |
lhand_bbox_size = np.stack(lhand_bbox_size_list, axis=0) | |
face_bbox_center = np.stack(face_bbox_center_list, axis=0) | |
face_bbox_size = np.stack(face_bbox_size_list, axis=0) | |
face_bbox_valid = np.stack(face_bbox_valid_list, axis=0) | |
body_bbox_center = np.stack(body_bbox_center_list, axis=0) | |
body_bbox_size = np.stack(body_bbox_size_list, axis=0) | |
body_bbox_valid = np.stack(body_bbox_valid_list, axis=0) | |
rhand_bbox_center = np.stack(rhand_bbox_center_list, axis=0) | |
rhand_bbox_valid = np.stack(rhand_bbox_valid_list, axis=0) | |
rhand_bbox_size = np.stack(rhand_bbox_size_list, axis=0) | |
inputs = {'img': img} | |
targets = { | |
# keypoints2d, [0,img_w],[0,img_h] -> [0,1] -> [0,output_hm_shape] | |
'joint_img': joint_img_aug, | |
# projected smplx if valid cam_param, else same as keypoints2d | |
# joint_cam, kp3d wo ra # raw kps3d probably without ra | |
'joint_cam': joint_cam_wo_ra, | |
'ann_idx': idx, | |
# kps3d with body, face, hand ra | |
'smplx_joint_cam': joint_cam_ra, | |
'smplx_pose': smplx_pose, | |
'smplx_shape': smplx_shape, | |
'smplx_expr': smplx_expr, | |
'lhand_bbox_center': lhand_bbox_center, | |
'lhand_bbox_size': lhand_bbox_size, | |
'rhand_bbox_center': rhand_bbox_center, | |
'rhand_bbox_size': rhand_bbox_size, | |
'face_bbox_center': face_bbox_center, | |
'face_bbox_size': face_bbox_size, | |
'body_bbox_center': body_bbox_center, | |
'body_bbox_size': body_bbox_size, | |
'body_bbox': body_bbox.reshape(-1,4), | |
'lhand_bbox': lhand_bbox.reshape(-1,4), | |
'rhand_bbox': rhand_bbox.reshape(-1,4), | |
'face_bbox': face_bbox.reshape(-1,4), | |
'gender': gender, | |
'bb2img_trans': bb2img_trans, | |
} | |
if self.body_only: | |
meta_info = { | |
'joint_trunc': joint_trunc, | |
'smplx_pose_valid': smplx_pose_valid, | |
'smplx_shape_valid': float(smplx_shape_valid), | |
'smplx_expr_valid': smplx_expr_valid, | |
'is_3D': float(False) if dummy_cord else float(True), | |
'lhand_bbox_valid': lhand_bbox_valid, | |
'rhand_bbox_valid': rhand_bbox_valid, | |
'face_bbox_valid': face_bbox_valid, | |
'body_bbox_valid': body_bbox_valid, | |
'img_shape': np.array(img.shape[:2]), | |
'ori_shape':data['img_shape'], | |
'idx': idx | |
} | |
else: | |
meta_info = { | |
'joint_trunc': joint_trunc, | |
'smplx_pose_valid': smplx_pose_valid, | |
'smplx_shape_valid': smplx_shape_valid, | |
'smplx_expr_valid': smplx_expr_valid, | |
'is_3D': float(False) if dummy_cord else float(True), | |
'lhand_bbox_valid': lhand_bbox_valid, | |
'rhand_bbox_valid': rhand_bbox_valid, | |
'face_bbox_valid': face_bbox_valid, | |
'body_bbox_valid': body_bbox_valid, | |
'img_shape': np.array(img.shape[:2]), | |
'ori_shape':data['img_shape'], | |
'idx': idx | |
} | |
result = {**inputs, **targets, **meta_info} | |
result = self.normalize(result) | |
result = self.format(result) | |
return result | |
def evaluate(self, outs, cur_sample_idx): | |
annots = self.datalist | |
sample_num = len(outs) | |
eval_result = { | |
'pa_mpvpe_all': [], | |
'pa_mpvpe_l_hand': [], | |
'pa_mpvpe_r_hand': [], | |
'pa_mpvpe_hand': [], | |
'pa_mpvpe_face': [], | |
'mpvpe_all': [], | |
'mpvpe_l_hand': [], | |
'mpvpe_r_hand': [], | |
'mpvpe_hand': [], | |
'mpvpe_face': [] | |
} | |
vis = getattr(cfg, 'vis', False) | |
vis_save_dir = cfg.vis_dir | |
csv_file = f'{cfg.result_dir}/agora_smplx_error.csv' | |
file = open(csv_file, 'a', newline='') | |
for n in range(sample_num): | |
annot = annots[cur_sample_idx + n] | |
out = outs[n] | |
mesh_gt = out['smplx_mesh_cam_target'] | |
mesh_out = out['smplx_mesh_cam'] | |
# print('zzz',mesh_gt.shape,mesh_out.shape) | |
# from pytorch3d.io import save_obj | |
# for m_i,(mesh_gt_i,mesh_out_i) in enumerate(zip(mesh_gt,mesh_out)): | |
# save_obj('temp_gt_%d.obj'%m_i,verts=torch.Tensor(mesh_gt_i),faces=torch.tensor([])) | |
# save_obj('temp_pred_%d.obj'%m_i,verts=torch.Tensor(mesh_out_i),faces=torch.tensor([])) | |
ann_idx = out['gt_ann_idx'] | |
img_path = [] | |
for ann_id in ann_idx: | |
img_path.append(annots[ann_id]['img_path']) | |
eval_result['img_path'] = img_path | |
eval_result['ann_idx'] = ann_idx | |
# MPVPE from all vertices | |
mesh_out_align = \ | |
mesh_out - np.dot( | |
smpl_x.J_regressor, mesh_out).transpose(1,0,2)[:, smpl_x.J_regressor_idx['pelvis'], None, :] + \ | |
np.dot(smpl_x.J_regressor, mesh_gt).transpose(1,0,2)[:, smpl_x.J_regressor_idx['pelvis'], None, :] | |
eval_result['mpvpe_all'].extend( | |
np.sqrt(np.sum( | |
(mesh_out_align - mesh_gt)**2, -1)).mean(-1) * 1000) | |
mesh_out_align = rigid_align_batch(mesh_out, mesh_gt) | |
eval_result['pa_mpvpe_all'].extend( | |
np.sqrt(np.sum( | |
(mesh_out_align - mesh_gt)**2, -1)).mean(-1) * 1000) | |
# MPVPE from hand vertices | |
mesh_gt_lhand = mesh_gt[:, smpl_x.hand_vertex_idx['left_hand'], :] | |
mesh_out_lhand = mesh_out[:, smpl_x.hand_vertex_idx['left_hand'], :] | |
mesh_gt_rhand = mesh_gt[:, smpl_x.hand_vertex_idx['right_hand'], :] | |
mesh_out_rhand = mesh_out[:, smpl_x.hand_vertex_idx['right_hand'], :] | |
mesh_out_lhand_align = \ | |
mesh_out_lhand - \ | |
np.dot(smpl_x.J_regressor, mesh_out).transpose(1,0,2)[:, smpl_x.J_regressor_idx['lwrist'], None, :] + \ | |
np.dot(smpl_x.J_regressor, mesh_gt).transpose(1,0,2)[:, smpl_x.J_regressor_idx['lwrist'], None, :] | |
mesh_out_rhand_align = \ | |
mesh_out_rhand - \ | |
np.dot(smpl_x.J_regressor, mesh_out).transpose(1,0,2)[:, smpl_x.J_regressor_idx['rwrist'], None, :] + \ | |
np.dot(smpl_x.J_regressor, mesh_gt).transpose(1,0,2)[:, smpl_x.J_regressor_idx['rwrist'], None, :] | |
eval_result['mpvpe_l_hand'].extend( | |
np.sqrt(np.sum( | |
(mesh_out_lhand_align - mesh_gt_lhand)**2, -1)).mean(-1) * | |
1000) | |
eval_result['mpvpe_r_hand'].extend( | |
np.sqrt(np.sum( | |
(mesh_out_rhand_align - mesh_gt_rhand)**2, -1)).mean(-1) * | |
1000) | |
eval_result['mpvpe_hand'].extend( | |
(np.sqrt(np.sum( | |
(mesh_out_lhand_align - mesh_gt_lhand)**2, -1)).mean(-1) * | |
1000 + | |
np.sqrt(np.sum( | |
(mesh_out_rhand_align - mesh_gt_rhand)**2, -1)).mean(-1) * | |
1000) / 2.) | |
mesh_out_lhand_align = rigid_align_batch(mesh_out_lhand, mesh_gt_lhand) | |
mesh_out_rhand_align = rigid_align_batch(mesh_out_rhand, mesh_gt_rhand) | |
eval_result['pa_mpvpe_l_hand'].extend( | |
np.sqrt(np.sum( | |
(mesh_out_lhand_align - mesh_gt_lhand)**2, -1)).mean(-1) * | |
1000) | |
eval_result['pa_mpvpe_r_hand'].extend( | |
np.sqrt(np.sum( | |
(mesh_out_rhand_align - mesh_gt_rhand)**2, -1)).mean(-1) * | |
1000) | |
eval_result['pa_mpvpe_hand'].extend( | |
(np.sqrt(np.sum( | |
(mesh_out_lhand_align - mesh_gt_lhand)**2, -1)).mean(-1) * | |
1000 + | |
np.sqrt(np.sum( | |
(mesh_out_rhand_align - mesh_gt_rhand)**2, -1)).mean(-1) * | |
1000) / 2.) | |
save_error=True | |
if save_error: | |
writer = csv.writer(file) | |
new_line = [ann_idx[n],img_path[n], eval_result['mpvpe_all'][-1], eval_result['pa_mpvpe_all'][-1]] | |
writer.writerow(new_line) | |
self.save_idx += 1 | |
return eval_result | |
def print_eval_result(self, eval_result): | |
print('AGORA test results are dumped at: ' + | |
osp.join(cfg.result_dir, 'predictions')) | |
if self.data_split == 'test' and self.test_set == 'test': # do not print. just submit the results to the official evaluation server | |
return | |
print('======AGORA-val======') | |
print('PA MPVPE (All): %.2f mm' % np.mean(eval_result['pa_mpvpe_all'])) | |
print('PA MPVPE (L-Hands): %.2f mm' % | |
np.mean(eval_result['pa_mpvpe_l_hand'])) | |
print('PA MPVPE (R-Hands): %.2f mm' % | |
np.mean(eval_result['pa_mpvpe_r_hand'])) | |
print('PA MPVPE (Hands): %.2f mm' % | |
np.mean(eval_result['pa_mpvpe_hand'])) | |
print('PA MPVPE (Face): %.2f mm' % | |
np.mean(eval_result['pa_mpvpe_face'])) | |
print() | |
print('MPVPE (All): %.2f mm' % np.mean(eval_result['mpvpe_all'])) | |
print('MPVPE (L-Hands): %.2f mm' % | |
np.mean(eval_result['mpvpe_l_hand'])) | |
print('MPVPE (R-Hands): %.2f mm' % | |
np.mean(eval_result['mpvpe_r_hand'])) | |
print('MPVPE (Hands): %.2f mm' % np.mean(eval_result['mpvpe_hand'])) | |
print('MPVPE (Face): %.2f mm' % np.mean(eval_result['mpvpe_face'])) | |
out_file = osp.join(cfg.result_dir,'agora_val.txt') | |
if os.path.exists(out_file): | |
f = open(out_file, 'a+') | |
else: | |
f = open(out_file, 'w', encoding="utf-8") | |
f.write('\n') | |
f.write(f'{cfg.exp_name}\n') | |
f.write(f'AGORA-val dataset: \n') | |
f.write('PA MPVPE (All): %.2f mm\n' % | |
np.mean(eval_result['pa_mpvpe_all'])) | |
f.write('PA MPVPE (L-Hands): %.2f mm\n' % | |
np.mean(eval_result['pa_mpvpe_l_hand'])) | |
f.write('PA MPVPE (R-Hands): %.2f mm\n' % | |
np.mean(eval_result['pa_mpvpe_r_hand'])) | |
f.write('PA MPVPE (Hands): %.2f mm\n' % | |
np.mean(eval_result['pa_mpvpe_hand'])) | |
f.write('PA MPVPE (Face): %.2f mm\n' % | |
np.mean(eval_result['pa_mpvpe_face'])) | |
f.write('MPVPE (All): %.2f mm\n' % np.mean(eval_result['mpvpe_all'])) | |
f.write('MPVPE (L-Hands): %.2f mm\n' % | |
np.mean(eval_result['mpvpe_l_hand'])) | |
f.write('MPVPE (R-Hands): %.2f mm\n' % | |
np.mean(eval_result['mpvpe_r_hand'])) | |
f.write('MPVPE (Hands): %.2f mm\n' % np.mean(eval_result['mpvpe_hand'])) | |
f.write('MPVPE (Face): %.2f mm\n' % np.mean(eval_result['mpvpe_face'])) | |