Spaces:

ttxskk
/

AiOS

Sleeping

AiOS / models /aios /criterion_smplx.py

ttxskk

update

d7e58f0 21 days ago

132 kB

	import copy
	import os
	import math
	from typing import List
	import torch
	import torch.nn.functional as F
	from torch import nn
	from torchvision.ops.boxes import nms
	from torch import Tensor
	from util import box_ops
	from util.misc import (NestedTensor, nested_tensor_from_tensor_list, accuracy,
	get_world_size, interpolate,
	is_dist_avail_and_initialized, inverse_sigmoid)
	from .utils import PoseProjector, sigmoid_focal_loss, MLP, OKSLoss
	from typing import Optional, Union
	from detrsmpl.core.conventions.keypoints_mapping import (get_keypoint_idx,
	convert_kps)
	from detrsmpl.utils.geometry import (batch_rodrigues, project_points_new)
	from config.config import cfg
	from util.human_models import smpl_x
	from detrsmpl.utils.transforms import rotmat_to_aa
	class SetCriterion(nn.Module):
	def __init__(self,
	num_classes,
	matcher,
	weight_dict,
	focal_alpha,
	losses,
	num_box_decoder_layers=2,
	num_hand_face_decoder_layers=4,
	num_body_points=17,
	num_hand_points=6,
	num_face_points=6,
	smpl_loss_config=None,
	convention='smplx_137'):
	super().__init__()
	self.num_classes = num_classes
	self.matcher = matcher
	self.weight_dict = weight_dict
	self.losses = losses
	self.focal_alpha = focal_alpha
	self.vis = 0.1
	self.abs = 1
	self.num_body_points = num_body_points
	self.num_hand_points = num_hand_points
	self.num_face_points = num_face_points
	self.num_box_decoder_layers = num_box_decoder_layers
	self.num_hand_face_decoder_layers = num_hand_face_decoder_layers
	self.convention = convention
	self.body_oks = OKSLoss(linear=True,
	num_keypoints=num_body_points,
	eps=1e-6,
	reduction='mean',
	loss_weight=1.0)
	self.hand_oks = OKSLoss(linear=True,
	num_keypoints=num_hand_points,
	eps=1e-6,
	reduction='mean',
	loss_weight=1.0)
	self.face_oks = OKSLoss(linear=True,
	num_keypoints=num_face_points,
	eps=1e-6,
	reduction='mean',
	loss_weight=1.0)

	def loss_labels(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	log=True):
	"""Classification loss (Binary focal loss) targets dicts must contain
	the key "labels" containing a tensor of dim [nb_target_boxes]"""
	indices = indices[0]
	assert 'pred_logits' in outputs
	src_logits = outputs['pred_logits']
	target_classes_o = torch.cat(
	[t['labels'][J] for t, (_, J) in zip(targets, indices)])
	target_classes = torch.full(src_logits.shape[:2],
	self.num_classes,
	dtype=torch.int64,
	device=src_logits.device)
	target_classes[idx] = target_classes_o

	target_classes_onehot = torch.zeros([
	src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1
	],
	dtype=src_logits.dtype,
	layout=src_logits.layout,
	device=src_logits.device)
	target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1)

	target_classes_onehot = target_classes_onehot[:, :, :-1]
	loss_ce = sigmoid_focal_loss(src_logits,
	target_classes_onehot,
	num_boxes,
	alpha=self.focal_alpha,
	gamma=2) * src_logits.shape[1]
	losses = {'loss_ce': loss_ce}

	if log:
	# TODO this should probably be a separate loss, not hacked in this one here
	losses['class_error'] = 100 - accuracy(src_logits[idx],
	target_classes_o)[0]
	return losses

	@torch.no_grad()
	def loss_cardinality(self, outputs, targets, indices, num_boxes,
	data_batch):
	"""Compute the cardinality error, ie the absolute error in the number
	of predicted non-empty boxes This is not really a loss, it is intended
	for logging purposes only.

	It doesn't propagate gradients
	"""
	pred_logits = outputs['pred_logits']
	device = pred_logits.device
	tgt_lengths = torch.as_tensor([len(v['labels']) for v in targets],
	device=device)
	if tgt_lengths == 0:
	return {'cardinality_error': pred_logits.sum()*0}
	# Count the number of predictions that are NOT "no-object" (which is the last class)
	card_pred = (pred_logits.argmax(-1) !=
	pred_logits.shape[-1] - 1).sum(1)
	card_err = F.l1_loss(card_pred.float(), tgt_lengths.float())
	losses = {'cardinality_error': card_err}
	return losses

	def loss_keypoints(self, outputs, targets, indices,
	idx, num_boxes, data_batch,
	face_hand_kpt=False):
	"""Compute the losses related to the keypoints."""
	indices = indices[0]
	losses = {}
	device = outputs['pred_logits'].device
	############################################################
	# body
	############################################################

	src_body_keypoints = outputs['pred_keypoints'][idx] # xyxyvv
	if len(src_body_keypoints) == 0:
	losses.append({
	'loss_keypoints': src_body_keypoints.sum() * 0 + \
	outputs['pred_smpl_cam'][idx].float().sum()*0,
	'loss_oks': src_body_keypoints.sum() * torch.as_tensor(0., device=device),
	})
	else:
	Z_pred = src_body_keypoints[:, 0:(self.num_body_points * 2)] # [2, 2*14]
	V_pred = src_body_keypoints[:, (self.num_body_points * 2):]
	targets_body_keypoints = torch.cat(
	[t['keypoints'][i] for t, (_, i) in zip(targets, indices)],
	dim=0)
	targets_area = torch.cat(
	[t['area'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_body_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['body_bbox_valid'], indices)], dim=0)
	Z_gt = targets_body_keypoints[:, 0:(self.num_body_points * 2)]
	V_gt: torch.Tensor = targets_body_keypoints[:, (self.num_body_points * 2):]
	body_kps_conf = V_gt.sum(-1)>0
	body_num_boxes = (body_kps_conf * target_body_boxes_conf).sum()
	oks_loss = self.body_oks(Z_pred,
	Z_gt,
	V_gt,
	targets_area,
	weight=None,
	avg_factor=None,
	reduction_override=None)
	oks_loss= body_kps_conf target_body_boxes_conf
	pose_loss = F.l1_loss(Z_pred, Z_gt, reduction='none')
	pose_loss = pose_loss * V_gt.repeat_interleave(2, dim=1)
	pose_loss = pose_loss.sum(-1) * target_body_boxes_conf
	if body_num_boxes>0:
	losses['loss_keypoints'] = pose_loss.sum() / body_num_boxes
	losses['loss_oks'] = oks_loss.sum() / body_num_boxes
	else:
	losses['loss_keypoints'] = src_body_keypoints.sum() * torch.as_tensor(0., device=device)
	losses['loss_oks'] = src_body_keypoints.sum() * torch.as_tensor(0., device=device)
	############################################################
	# lhand
	############################################################
	if 'pred_lhand_keypoints' in outputs and face_hand_kpt:
	src_lhand_keypoints = outputs['pred_lhand_keypoints'][idx] # xyxyvv
	if len(src_lhand_keypoints) == 0:
	losses.update({
	'loss_lhand_keypoints': src_lhand_keypoints.sum() * torch.as_tensor(0., device=device),
	'loss_lhand_oks':src_lhand_keypoints.sum() * torch.as_tensor(0., device=device),
	})
	else:
	Z_pred = src_lhand_keypoints[:, 0:(self.num_hand_points * 2)] # [2, 2*14]
	V_pred = src_lhand_keypoints[:, (self.num_hand_points * 2):]
	targets_lhand_keypoints = torch.cat(
	[t['lhand_keypoints'][i] for t, (_, i) in zip(targets, indices)],
	dim=0) # i is batch_size
	targets_area = torch.cat(
	[t['area'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_lhand_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['lhand_bbox_valid'], indices)], dim=0)

	Z_gt = targets_lhand_keypoints[:, 0:(self.num_hand_points * 2)]
	V_gt: torch.Tensor = targets_lhand_keypoints[:, (self.num_hand_points * 2):]
	lhand_kps_conf = V_gt.sum(-1)>0
	lhand_num_boxes = (lhand_kps_conf*target_lhand_boxes_conf).sum()
	oks_loss = self.hand_oks(Z_pred,
	Z_gt,
	V_gt,
	targets_area,
	weight=None,
	avg_factor=None,
	reduction_override=None)
	oks_loss = oks_losslhand_kps_conftarget_lhand_boxes_conf
	pose_loss = F.l1_loss(Z_pred, Z_gt, reduction='none')
	pose_loss = pose_loss * V_gt.repeat_interleave(2, dim=1)
	pose_loss = pose_loss.sum(-1)*target_lhand_boxes_conf
	if lhand_num_boxes>0:
	losses['loss_lhand_keypoints'] = pose_loss.sum() / lhand_num_boxes
	losses['loss_lhand_oks'] = oks_loss.sum() / lhand_num_boxes
	else:
	losses['loss_lhand_keypoints'] = src_lhand_keypoints.sum() * torch.as_tensor(0., device=device)
	losses['loss_lhand_oks'] = src_lhand_keypoints.sum() * torch.as_tensor(0., device=device)

	############################################################
	# rhand
	############################################################
	if 'pred_rhand_keypoints' in outputs and face_hand_kpt:
	src_rhand_keypoints = outputs['pred_rhand_keypoints'][idx] # xyxyvv
	if len(src_rhand_keypoints) == 0:
	losses.update({
	'loss_rhand_keypoints':
	src_rhand_keypoints.sum() * torch.as_tensor(0., device=device),
	'loss_rhand_oks':
	src_rhand_keypoints.sum() * torch.as_tensor(0., device=device),
	})
	else:
	Z_pred = src_rhand_keypoints[:, 0:(self.num_hand_points * 2)] # [2, 2*14]
	V_pred = src_rhand_keypoints[:, (self.num_hand_points * 2):]
	targets_rhand_keypoints = torch.cat(
	[t['rhand_keypoints'][i] for t, (_, i) in zip(targets, indices)],
	dim=0)
	targets_area = torch.cat(
	[t['area'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_rhand_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['rhand_bbox_valid'], indices)], dim=0)
	Z_gt = targets_rhand_keypoints[:, 0:(self.num_hand_points * 2)]
	V_gt: torch.Tensor = targets_rhand_keypoints[:, (self.num_hand_points * 2):]
	rhand_kps_conf = V_gt.sum(-1)>0
	rhand_num_boxes = (rhand_kps_conf*target_rhand_boxes_conf).sum()
	oks_loss = self.hand_oks(Z_pred,
	Z_gt,
	V_gt,
	targets_area,
	weight=None,
	avg_factor=None,
	reduction_override=None)
	oks_loss = oks_lossrhand_kps_conftarget_rhand_boxes_conf
	pose_loss = F.l1_loss(Z_pred, Z_gt, reduction='none')
	pose_loss = pose_loss * V_gt.repeat_interleave(2, dim=1)
	pose_loss = pose_loss.sum(-1)*target_rhand_boxes_conf
	if rhand_num_boxes>0:
	losses['loss_rhand_keypoints'] = pose_loss.sum() / rhand_num_boxes
	losses['loss_rhand_oks'] = oks_loss.sum() / rhand_num_boxes
	else:
	losses['loss_rhand_keypoints'] = src_rhand_keypoints.sum() * torch.as_tensor(0., device=device)
	losses['loss_rhand_oks'] = src_rhand_keypoints.sum() * torch.as_tensor(0., device=device)

	############################################################
	# face
	############################################################
	if 'pred_face_keypoints' in outputs and face_hand_kpt:
	src_face_keypoints = outputs['pred_face_keypoints'][idx] # xyxyvv
	if len(src_face_keypoints) == 0:
	losses.update({
	'loss_face_keypoints': src_face_keypoints.sum() * 0,
	'loss_face_oks': src_face_keypoints.sum() * 0,
	})
	else:
	Z_pred = src_face_keypoints[:, 0:(self.num_face_points * 2)] # [2, 2*14]
	V_pred = src_face_keypoints[:, (self.num_face_points * 2):]
	targets_face_keypoints = torch.cat(
	[t['face_keypoints'][i] for t, (_, i) in zip(targets, indices)],
	dim=0)
	targets_area = torch.cat(
	[t['area'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_face_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['face_bbox_valid'], indices)], dim=0)
	Z_gt = targets_face_keypoints[:, 0:(self.num_face_points * 2)]
	V_gt: torch.Tensor = targets_face_keypoints[:, (self.num_face_points * 2):]
	face_kps_conf = V_gt.sum(-1)>0
	face_num_boxes = (lhand_kps_conf*target_face_boxes_conf).sum()
	oks_loss = self.face_oks(Z_pred,
	Z_gt,
	V_gt,
	targets_area,
	weight=None,
	avg_factor=None,
	reduction_override=None)
	oks_loss = oks_lossface_kps_conftarget_face_boxes_conf
	pose_loss = F.l1_loss(Z_pred, Z_gt, reduction='none')
	pose_loss = pose_loss * V_gt.repeat_interleave(2, dim=1)
	pose_loss = pose_loss.sum(-1)*target_face_boxes_conf
	if face_num_boxes>0:
	losses['loss_face_keypoints'] = pose_loss.sum() / face_num_boxes
	losses['loss_face_oks'] = oks_loss.sum() / face_num_boxes
	else:
	losses['loss_face_keypoints'] = src_face_keypoints.sum() * torch.as_tensor(0., device=device)
	losses['loss_face_oks'] = src_face_keypoints.sum() * torch.as_tensor(0., device=device)

	return losses

	def loss_smpl_pose(self, outputs, targets, indices, idx, num_boxes,
	data_batch, face_hand_kpt=False):
	device = outputs['pred_logits'].device
	indices = indices[0]
	pred_smpl_body_pose = outputs['pred_smpl_pose'][idx] # 22
	pred_smpl_lhand_pose = outputs['pred_smpl_lhand_pose'][idx] # 15
	pred_smpl_rhand_pose = outputs['pred_smpl_rhand_pose'][idx] # 15
	pred_smpl_jaw_pose = outputs['pred_smpl_jaw_pose'][idx]

	pred_smplx_pose = torch.cat((pred_smpl_body_pose, pred_smpl_lhand_pose,
	pred_smpl_rhand_pose, pred_smpl_jaw_pose),
	dim=1)

	targets_smpl_pose = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['smplx_pose'], indices)],
	dim=0)
	targets_smpl_pose = batch_rodrigues(targets_smpl_pose.view(
	-1, 3)).view(-1, 53, 3, 3)
	conf = torch.cat([
	t[i] for t, (_, i) in zip(data_batch['smplx_pose_valid'], indices)
	], dim=0)
	# conf = (conf.reshape(-1,53,3)[:,:,:,None]).repeat(1,1,1,3)
	body_pose_valid = conf[:, :22].sum(-1) > 0
	lhand_pose_valid = conf[:, 22:37].sum(-1) > 0
	rhand_pose_valid = conf[:, 37:52].sum(-1) > 0
	face_pose_valid = conf[:, 52].sum(-1) > 0

	losses = {}
	loss_smpl_pose = \
	F.l1_loss(
	pred_smplx_pose,
	targets_smpl_pose,
	reduction='none'
	)
	loss_smpl_pose = loss_smpl_pose.sum([-1,-2]) * conf
	if face_hand_kpt:
	losses = {
	'loss_smpl_pose_root': loss_smpl_pose[:, 0].sum() / (body_pose_valid.sum() + 1e-6),
	'loss_smpl_pose_body': loss_smpl_pose[:, 1:22].sum() / (body_pose_valid.sum() + 1e-6),
	'loss_smpl_pose_lhand': loss_smpl_pose[:, 22:37].sum() / (lhand_pose_valid.sum() + 1e-6),
	'loss_smpl_pose_rhand': loss_smpl_pose[:, 37:52].sum() / (rhand_pose_valid.sum() + 1e-6),
	'loss_smpl_pose_jaw': loss_smpl_pose[:, 52].sum() / (face_pose_valid.sum() + 1e-6),
	}
	else:
	losses = {
	'loss_smpl_pose_root': loss_smpl_pose[:, 0].sum() / (body_pose_valid.sum() + 1e-6),
	'loss_smpl_pose_body': loss_smpl_pose[:, 1:22].sum() / (body_pose_valid.sum() + 1e-6),
	'loss_smpl_pose_lhand': torch.as_tensor(0., device=device) * loss_smpl_pose[:, 22:37].sum()/(lhand_pose_valid.sum() + 1e-6),
	'loss_smpl_pose_rhand': torch.as_tensor(0., device=device) * loss_smpl_pose[:, 37:52].sum() / (rhand_pose_valid.sum() + 1e-6),
	'loss_smpl_pose_jaw': torch.as_tensor(0., device=device)*loss_smpl_pose[:, 52].sum() / (face_pose_valid.sum() + 1e-6),
	}
	return losses

	def loss_smpl_beta(self, outputs, targets, indices, idx, num_boxes,
	data_batch, face_hand_kpt=False):
	indices = indices[0]
	device = outputs['pred_logits'].device
	pred_smpl_betas = outputs['pred_smpl_beta'][idx]

	targets_smpl_betas = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['smplx_shape'], indices)],
	dim=0)

	losses = {}
	conf = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_shape_valid'], indices)], dim=0)
	if conf.sum() == 0:
	return {
	'loss_smpl_beta': pred_smpl_betas.sum() * 0
	}
	loss_smpl_betas = \
	F.l1_loss(
	pred_smpl_betas,
	targets_smpl_betas,
	reduction='none'
	)
	loss_smpl_betas = loss_smpl_betas.sum(-1) * conf
	losses = {'loss_smpl_beta': loss_smpl_betas.sum() / (conf.sum() + 1e-6)}
	return losses

	def loss_smpl_expr(self, outputs, targets, indices, idx, num_boxes,
	data_batch, face_hand_kpt=False):
	indices = indices[0]
	device = outputs['pred_logits'].device
	pred_smpl_expr = outputs['pred_smpl_expr'][idx]
	targets_smpl_expr = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_expr'], indices)], dim=0)

	conf = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_expr_valid'], indices)], dim=0)
	if conf.sum() == 0:
	return {
	'loss_smpl_expr': pred_smpl_expr.sum() * torch.as_tensor(0., device=device)
	}

	loss_smpl_expr = \
	F.l1_loss(
	pred_smpl_expr,
	targets_smpl_expr,
	reduction='none'
	)
	loss_smpl_expr = loss_smpl_expr.sum(-1) * conf

	losses = {}
	if face_hand_kpt:
	losses = {'loss_smpl_expr': loss_smpl_expr.sum() / (conf.sum() + 1e-6)}
	else:
	losses = {'loss_smpl_expr': torch.as_tensor(0., device=device)*loss_smpl_expr.sum() / (conf.sum() + 1e-6) }

	return losses

	def loss_smpl_kp3d(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	has_keypoints3d=None,
	face_hand_kpt=False):

	# supervision for keypoints3d wo/ ra
	device = outputs['pred_logits'].device
	indices = indices[0]

	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()
	# meta_info['joint_valid'] * meta_info['is_3D'][:, None, None])
	targets_smpl_kp3d = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['joint_cam'], indices)],
	dim=0)
	losses = {}
	targets_kp3d_conf = targets_smpl_kp3d[:,:,3:].clone()
	targets_smpl_kp3d = targets_smpl_kp3d[:,:,:3]

	targets_is_3d = torch.cat([
	t[None, None].repeat(len(i), 1, 1)
	for t, (_, i) in zip(data_batch['is_3D'], indices)
	], dim=0)

	targets_kp3d_conf = (targets_kp3d_conf * targets_is_3d)
	pelvis_idx = get_keypoint_idx('pelvis', self.convention)
	targets_pelvis = targets_smpl_kp3d[..., pelvis_idx, :]
	pred_pelvis = pred_smpl_kp3d[..., pelvis_idx, :]

	targets_smpl_kp3d = targets_smpl_kp3d - targets_pelvis[:, None, :]
	pred_smpl_kp3d = pred_smpl_kp3d - pred_pelvis[:, None, :]

	losses = {}
	body_idx = smpl_x.joint_part['body']
	face_idx = smpl_x.joint_part['face']
	lhand_idx = smpl_x.joint_part['lhand']
	rhand_idx = smpl_x.joint_part['rhand']

	loss_smpl_kp3d = F.l1_loss(pred_smpl_kp3d,
	targets_smpl_kp3d,
	reduction='none')

	body_kp3d_valid = targets_kp3d_conf[:, body_idx].sum([-1,-2]) > 0
	lhand_kp3d_valid = targets_kp3d_conf[:, lhand_idx].sum([-1,-2]) > 0
	rhand_kp3d_valid = targets_kp3d_conf[:, rhand_idx].sum([-1,-2]) > 0
	face_kp3d_valid = targets_kp3d_conf[:, face_idx].sum([-1,-2]) > 0

	loss_smpl_kp3d = loss_smpl_kp3d * targets_kp3d_conf # + outputs['pred_smpl_cam'][idx].float().sum()*0

	if face_hand_kpt:
	losses['loss_smpl_body_kp3d'] = torch.sum(loss_smpl_kp3d[:, body_idx, :]) / (body_kp3d_valid.sum() + 1e-6)
	losses['loss_smpl_lhand_kp3d'] = torch.sum(loss_smpl_kp3d[:, lhand_idx, :]) / (lhand_kp3d_valid.sum() + 1e-6)
	losses['loss_smpl_rhand_kp3d'] = torch.sum(loss_smpl_kp3d[:, rhand_idx, :]) / (rhand_kp3d_valid.sum() + 1e-6)
	losses['loss_smpl_face_kp3d'] = torch.sum(loss_smpl_kp3d[:, face_idx, :]) / (face_kp3d_valid.sum() + 1e-6)
	else:
	losses['loss_smpl_body_kp3d'] = torch.sum(loss_smpl_kp3d[:, body_idx, :]) / (body_kp3d_valid.sum() + 1e-6)
	losses['loss_smpl_lhand_kp3d'] = torch.as_tensor(0., device=device)*torch.sum(loss_smpl_kp3d[:, lhand_idx, :]) / (lhand_kp3d_valid.sum() + 1e-6)
	losses['loss_smpl_rhand_kp3d'] = torch.as_tensor(0., device=device)*torch.sum(loss_smpl_kp3d[:, rhand_idx, :]) / (rhand_kp3d_valid.sum() + 1e-6)
	losses['loss_smpl_face_kp3d'] = torch.as_tensor(0., device=device)*torch.sum(loss_smpl_kp3d[:, face_idx, :]) / (face_kp3d_valid.sum() + 1e-6)
	return losses

	def loss_smpl_kp3d_ra(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	has_keypoints3d=None,
	face_hand_kpt=False):
	# supervision for keypoints3d w/ ra
	device = outputs['pred_logits'].device
	indices = indices[0]


	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()

	# meta_info['joint_valid'] * meta_info['is_3D'][:, None, None])
	targets_smpl_kp3d = torch.cat([
	t[i] for t, (_, i) in zip(data_batch['smplx_joint_cam'], indices)],
	dim=0)
	losses = {}
	# if valid_num == 0:
	# losses['loss_smpl_rhand_kp3d_ra'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	# losses['loss_smpl_body_kp3d_ra'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	# losses['loss_smpl_face_kp3d_ra'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	# losses['loss_smpl_lhand_kp3d_ra'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	# return losses

	targets_kp3d_conf = targets_smpl_kp3d[:,:,3:].clone()
	targets_smpl_kp3d = targets_smpl_kp3d[:,:,:3]

	targets_is_3d = torch.cat([
	t[None, None].repeat(len(i), 1, 1)
	for t, (_, i) in zip(data_batch['is_3D'], indices)],dim=0)

	targets_kp3d_conf = (targets_kp3d_conf * targets_is_3d).repeat(1, 1, 3)
	# targets_smpl_kp3d = targets_smpl_kp3d[..., :3].float()
	pelvis_idx = get_keypoint_idx('pelvis', self.convention)
	targets_pelvis = targets_smpl_kp3d[..., pelvis_idx, :]
	pred_pelvis = pred_smpl_kp3d[..., pelvis_idx, :]

	targets_smpl_kp3d = targets_smpl_kp3d - targets_pelvis[:, None, :]
	pred_smpl_kp3d = pred_smpl_kp3d - pred_pelvis[:, None, :]
	# calculate body, face and hand loss separately:

	losses = {}
	body_idx = smpl_x.joint_part['body']
	face_idx = smpl_x.joint_part['face']
	lhand_idx = smpl_x.joint_part['lhand']
	rhand_idx = smpl_x.joint_part['rhand']

	body_kp3d_valid = targets_kp3d_conf[:, body_idx].sum([-1,-2]) > 0
	lhand_kp3d_valid = targets_kp3d_conf[:, lhand_idx].sum([-1,-2]) > 0
	rhand_kp3d_valid = targets_kp3d_conf[:, rhand_idx].sum([-1,-2]) > 0
	face_kp3d_valid = targets_kp3d_conf[:, face_idx].sum([-1,-2]) > 0

	loss_smpl_body_kp3d = F.l1_loss(pred_smpl_kp3d[:, body_idx, :],
	targets_smpl_kp3d[:, body_idx, :],
	reduction='none')

	loss_smpl_body_kp3d = torch.sum(
	loss_smpl_body_kp3d * targets_kp3d_conf[:, body_idx, :])
	losses['loss_smpl_body_kp3d_ra'] = loss_smpl_body_kp3d / (body_kp3d_valid.sum() + 1e-6)

	face_cam = pred_smpl_kp3d[:, face_idx, :]
	neck_cam = pred_smpl_kp3d[:, smpl_x.neck_idx, None, :]
	face_cam = face_cam - neck_cam
	loss_smpl_face_kp3d = F.l1_loss(face_cam,
	targets_smpl_kp3d[:, face_idx, :],
	reduction='none')
	loss_smpl_face_kp3d = torch.sum(
	loss_smpl_face_kp3d * targets_kp3d_conf[:, face_idx, :])
	if face_hand_kpt:
	losses['loss_smpl_face_kp3d_ra'] = (loss_smpl_face_kp3d / (face_kp3d_valid.sum() + 1e-6))
	else:
	losses['loss_smpl_face_kp3d_ra'] = 0 * (loss_smpl_face_kp3d / (face_kp3d_valid.sum() + 1e-6))

	lhand_cam = pred_smpl_kp3d[:, lhand_idx, :]
	lwrist_cam = pred_smpl_kp3d[:, smpl_x.lwrist_idx, None, :]
	lhand_cam = lhand_cam - lwrist_cam
	loss_smpl_lhand_kp3d = F.l1_loss(lhand_cam,
	targets_smpl_kp3d[:, lhand_idx, :],
	reduction='none')
	loss_smpl_lhand_kp3d = torch.sum(
	loss_smpl_lhand_kp3d * targets_kp3d_conf[:, lhand_idx, :])

	if face_hand_kpt:
	losses['loss_smpl_lhand_kp3d_ra'] = (loss_smpl_lhand_kp3d / (lhand_kp3d_valid.sum() + 1e-6))
	else:
	losses['loss_smpl_lhand_kp3d_ra'] = 0*(loss_smpl_lhand_kp3d / (lhand_kp3d_valid.sum() + 1e-6))

	rhand_cam = pred_smpl_kp3d[:, rhand_idx, :]
	rwrist_cam = pred_smpl_kp3d[:, smpl_x.rwrist_idx, None, :]
	rhand_cam = rhand_cam - rwrist_cam

	loss_smpl_rhand_kp3d = F.l1_loss(rhand_cam,
	targets_smpl_kp3d[:, rhand_idx, :],
	reduction='none')
	loss_smpl_rhand_kp3d = torch.sum(
	loss_smpl_rhand_kp3d * targets_kp3d_conf[:, rhand_idx, :])

	if face_hand_kpt:
	losses['loss_smpl_rhand_kp3d_ra'] = (loss_smpl_rhand_kp3d / (rhand_kp3d_valid.sum() + 1e-6))
	else:
	losses['loss_smpl_rhand_kp3d_ra'] = 0*(loss_smpl_rhand_kp3d / (rhand_kp3d_valid.sum() + 1e-6))

	return losses

	def loss_smpl_kp2d(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	focal_length=5000.,
	has_keypoints2d=None,
	face_hand_kpt=False):
	"""Compute loss for 2d keypoints."""
	device = outputs['pred_logits'].device
	indices = indices[0]

	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()#.detach()
	pred_cam = outputs['pred_smpl_cam'][idx].float()
	targets_kp2d = torch.cat([t[i] for t, (_, i) in zip(data_batch['joint_img'], indices)], dim=0)

	keypoints2d_conf = targets_kp2d[:,:,2:].clone()

	targets_kp2d = targets_kp2d[:, :, :2].float()
	targets_kp2d[:,:,0] = targets_kp2d[:,:,0]/cfg.output_hm_shape[2]
	targets_kp2d[:,:,1] = targets_kp2d[:,:,1]/cfg.output_hm_shape[1]
	# targets_kp2d = targets_kp2d*2-1
	img_wh = torch.cat([data_batch['img_shape'][i][None] for i in idx[0]], dim=0).flip(-1)
	pred_smpl_kp2d = project_points_new(
	points_3d=pred_smpl_kp3d,
	pred_cam=pred_cam,
	focal_length=focal_length,
	camera_center=img_wh/2
	)

	pred_smpl_kp2d = pred_smpl_kp2d / img_wh[:, None]

	losses = {}
	body_idx = smpl_x.joint_part['body']
	face_idx = smpl_x.joint_part['face']
	lhand_idx = smpl_x.joint_part['lhand']
	rhand_idx = smpl_x.joint_part['rhand']

	body_kp2d_valid = keypoints2d_conf[:, body_idx].sum([-1,-2]) > 0
	lhand_kp2d_valid = keypoints2d_conf[:, lhand_idx].sum([-1,-2]) > 0
	rhand_kp2d_valid = keypoints2d_conf[:, rhand_idx].sum([-1,-2]) > 0
	face_kp2d_valid = keypoints2d_conf[:, face_idx].sum([-1,-2]) > 0

	loss_smpl_kp2d = F.l1_loss(pred_smpl_kp2d,
	targets_kp2d,
	reduction='none')
	loss_smpl_kp2d = loss_smpl_kp2d * keypoints2d_conf

	# import mmcv
	# import cv2
	# img = (data_batch['img'][0]*255).permute(1,2,0).int().detach().cpu().numpy()

	if face_hand_kpt:
	losses['loss_smpl_body_kp2d'] = torch.sum(loss_smpl_kp2d[:, body_idx, :]) / (body_kp2d_valid.sum() + 1e-6)
	losses['loss_smpl_lhand_kp2d'] = torch.sum(loss_smpl_kp2d[:, lhand_idx, :]) / (lhand_kp2d_valid.sum() + 1e-6)
	losses['loss_smpl_rhand_kp2d'] = torch.sum(loss_smpl_kp2d[:, rhand_idx, :]) / (rhand_kp2d_valid.sum() + 1e-6)
	losses['loss_smpl_face_kp2d'] = torch.sum(loss_smpl_kp2d[:, face_idx, :]) / (face_kp2d_valid.sum() + 1e-6)

	else:
	losses['loss_smpl_body_kp2d'] = torch.sum(loss_smpl_kp2d[:, body_idx, :]) / (body_kp2d_valid.sum() + 1e-6)
	losses['loss_smpl_lhand_kp2d'] = 0*torch.sum(loss_smpl_kp2d[:, lhand_idx, :]) / (lhand_kp2d_valid.sum() + 1e-6)
	losses['loss_smpl_rhand_kp2d'] = 0*torch.sum(loss_smpl_kp2d[:, rhand_idx, :]) / (rhand_kp2d_valid.sum() + 1e-6)
	losses['loss_smpl_face_kp2d'] = 0*torch.sum(loss_smpl_kp2d[:, face_idx, :]) / (face_kp2d_valid.sum() + 1e-6)


	return losses

	def loss_smpl_kp2d_ba(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	focal_length=5000.,
	has_keypoints2d=None,
	face_hand_kpt=False):
	"""Compute loss for 2d keypoints."""
	device = outputs['pred_logits'].device
	indices = indices[0]
	# pdb.set_trace()
	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()#.detach()
	pred_cam = outputs['pred_smpl_cam'][idx].float()


	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)
	targets_kp2d = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['joint_img'], indices)],
	dim=0)
	losses = {}



	keypoints2d_conf = targets_kp2d[:,:,2:].clone()
	targets_kp2d = targets_kp2d[:,:,:2]

	keypoints2d_conf = keypoints2d_conf.repeat(1, 1, 2)
	targets_kp2d = targets_kp2d[:, :, :2].float()
	targets_kp2d[:, :, 0] = targets_kp2d[:, :, 0] / cfg.output_hm_shape[2]
	targets_kp2d[:, :, 1] = targets_kp2d[:, :, 1] / cfg.output_hm_shape[1]
	# targets_kp2d = targets_kp2d * 2 - 1
	img_wh = torch.cat([data_batch['img_shape'][i][None] for i in idx[0]], dim=0).flip(-1)

	pred_smpl_kp2d = project_points_new(
	points_3d=pred_smpl_kp3d,
	pred_cam=pred_cam,
	focal_length=focal_length,
	camera_center=img_wh/2
	)

	pred_smpl_kp2d = pred_smpl_kp2d / img_wh[:, None]

	if valid_num == 0:
	losses['loss_smpl_body_kp2d_ba'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_lhand_kp2d_ba'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_rhand_kp2d_ba'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_face_kp2d_ba'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0
	return losses
	# rhand bbox
	rhand_bbox_valid = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['rhand_bbox_valid'], indices) ], dim=0)
	rhand_bbox_gt = torch.cat(
	[t['rhand_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	rhand_bbox_gt = (box_ops.box_cxcywh_to_xyxy(rhand_bbox_gt).
	reshape(-1,2,2)*img_wh[:, None]).reshape(-1, 4)
	num_rhand_bbox = rhand_bbox_valid.sum()
	# lhand bbox
	lhand_bbox_valid = torch.cat([
	t[i] for t, (_, i) in zip(data_batch['lhand_bbox_valid'], indices)], dim=0)
	lhand_bbox_gt = torch.cat(
	[t['lhand_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	lhand_bbox_gt = (box_ops.box_cxcywh_to_xyxy(lhand_bbox_gt).
	reshape(-1,2,2)*img_wh[:, None]).reshape(-1, 4)
	num_lhand_bbox = lhand_bbox_valid.sum()
	# face bbox
	face_bbox_valid = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['face_bbox_valid'], indices)], dim=0)
	face_bbox_gt = torch.cat(
	[t['face_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	face_bbox_gt = (box_ops.box_cxcywh_to_xyxy(face_bbox_gt).
	reshape(-1,2,2)*img_wh[:, None]).reshape(-1, 4)
	num_face_bbox = face_bbox_valid.sum()
	img_shape = torch.cat(
	[t[None].repeat(len(i), 1) for t, (_, i) in zip(data_batch['img_shape'], indices)], dim=0)

	# joint_proj = (joint_proj / 2 + 0.5)
	# joint_proj[:, :, 0] = joint_proj[:, :, 0] * img_shape[:, 1:]
	# joint_proj[:, :, 1] = joint_proj[:, :, 1] * img_shape[:, :1]

	if not (lhand_bbox_valid + rhand_bbox_valid + face_bbox_valid == 0).all():
	for part_name, bbox in (
	('lhand', lhand_bbox_gt),
	('rhand', rhand_bbox_gt),
	('face', face_bbox_gt)):

	x = targets_kp2d[:, smpl_x.joint_part[part_name], 0]
	y = targets_kp2d[:, smpl_x.joint_part[part_name], 1]
	# trunc = joint_trunc[:, smpl_x.joint_part[part_name], 0]
	trunc = keypoints2d_conf[:, smpl_x.joint_part[part_name], 0].clone()
	# x in [0, 1]? bbox in [0, 1].
	x -= (bbox[:, None, 0] / img_shape[:, 1:])
	# x
	x *= (img_shape[:, 1:] / (bbox[:, None, 2] - bbox[:, None, 0] + 1e-6))

	y -= (bbox[:, None, 1] / img_shape[:, :1])
	y *= (img_shape[:, :1] / (bbox[:, None, 3] - bbox[:, None, 1] + 1e-6))
	# transformed to 0-1 bbox space

	trunc = ((x >= 0) (x <= 1) *
	(y >= 0) * (y <= 1))


	coord = torch.stack((x, y), 2)


	targets_kp2d = torch.cat(
	(targets_kp2d[:, :smpl_x.joint_part[part_name][0], :], coord,
	targets_kp2d[:, smpl_x.joint_part[part_name][-1] + 1:, :]),
	1)

	x_pred = pred_smpl_kp2d[:, smpl_x.joint_part[part_name], 0]
	y_pred = pred_smpl_kp2d[:, smpl_x.joint_part[part_name], 1]
	# bbox: xyxy img_shape: hw
	x_pred -= (bbox[:, None, 0] / img_shape[:, 1:])
	x_pred *= (img_shape[:, 1:] / (bbox[:, None, 2] - bbox[:, None, 0] + 1e-6))

	y_pred -= (bbox[:, None, 1] / img_shape[:, :1])
	y_pred *= (img_shape[:, :1] / (bbox[:, None, 3] - bbox[:, None, 1] + 1e-6))

	coord_pred = torch.stack((x_pred, y_pred), 2)
	trans = []

	for bid in range(coord_pred.shape[0]):
	mask = trunc[bid] == 1

	if torch.sum(mask) == 0:
	trans.append(torch.zeros((2)).float().cuda())
	else:
	trans.append(
	(-coord_pred[bid, mask, :2] + targets_kp2d[:, smpl_x.joint_part[part_name], :][bid, mask, :2]).mean(0))
	trans = torch.stack(trans)[:, None, :]

	coord_pred = coord_pred + trans # global translation alignment
	pred_smpl_kp2d = torch.cat(
	(pred_smpl_kp2d[:, :smpl_x.joint_part[part_name][0], :], coord_pred,
	pred_smpl_kp2d[:, smpl_x.joint_part[part_name][-1] + 1:, :]),
	1)



	loss_smpl_kp2d_ba = F.l1_loss(pred_smpl_kp2d,
	targets_kp2d[:, :, :2],
	reduction='none')
	valid_pos = keypoints2d_conf > 0

	losses = {}
	if keypoints2d_conf[valid_pos].numel() == 0:
	return {
	'loss_smpl_body_kp2d_ba': loss_smpl_kp2d_ba.sum()*0,
	'loss_smpl_lhand_kp2d_ba': loss_smpl_kp2d_ba.sum()*0,
	'loss_smpl_rhand_kp2d_ba': loss_smpl_kp2d_ba.sum()*0,
	'loss_smpl_face_kp2d_ba': loss_smpl_kp2d_ba.sum()*0,
	}
	# loss /= targets_kp3d_conf[valid_pos].numel()
	# 要改
	loss_smpl_kp2d_ba = loss_smpl_kp2d_ba * keypoints2d_conf
	losses['loss_smpl_body_kp2d_ba'] = torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['body'], :]) / num_boxes
	if face_hand_kpt:
	if num_lhand_bbox>0:
	losses['loss_smpl_lhand_kp2d_ba'] = torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['lhand'], :]) / num_lhand_bbox
	else:
	losses['loss_smpl_lhand_kp2d_ba'] = torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0
	if num_rhand_bbox>0:
	losses['loss_smpl_rhand_kp2d_ba'] = torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['rhand'], :]) / num_rhand_bbox
	else:
	losses['loss_smpl_rhand_kp2d_ba'] = torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0
	if num_face_bbox>0:
	losses['loss_smpl_face_kp2d_ba'] = torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['face'], :]) / num_face_bbox
	else:
	losses['loss_smpl_face_kp2d_ba'] = torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0
	else:
	losses['loss_smpl_lhand_kp2d_ba'] = 0*torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['lhand'], :]) / num_lhand_bbox

	losses['loss_smpl_rhand_kp2d_ba'] = 0*torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['rhand'], :]) / num_rhand_bbox

	losses['loss_smpl_face_kp2d_ba'] = 0*torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['face'], :]) / num_face_bbox
	return losses

	def loss_boxes(self, outputs, targets, indices,
	idx, num_boxes, data_batch,
	face_hand_box=False):
	"""Compute the losses related to the bounding boxes, the L1 regression
	loss and the GIoU loss targets dicts must contain the key "boxes"
	containing a tensor of dim [nb_target_boxes, 4] The target boxes are
	expected in format (center_x, center_y, w, h), normalized by the image
	size."""
	indices = indices[0]
	device = outputs['pred_logits'].device
	assert 'pred_boxes' in outputs

	src_body_boxes = outputs['pred_boxes'][idx]
	target_body_boxes = torch.cat(
	[t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_body_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['body_bbox_valid'], indices)], dim=0)

	loss_body_bbox = F.l1_loss(src_body_boxes, target_body_boxes, reduction='none')
	loss_body_bbox = loss_body_bbox * target_body_boxes_conf[:,None]

	losses = {}
	losses['loss_body_bbox'] = loss_body_bbox.sum() / num_boxes
	loss_body_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_body_boxes),
	box_ops.box_cxcywh_to_xyxy(target_body_boxes)))

	loss_body_giou = loss_body_giou * target_body_boxes_conf
	losses['loss_body_giou'] = loss_body_giou.sum() / num_boxes

	if 'pred_lhand_boxes' in outputs and face_hand_box:
	src_lhand_boxes = outputs['pred_lhand_boxes'][idx]
	target_lhand_boxes = torch.cat(
	[t['lhand_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_lhand_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['lhand_bbox_valid'], indices)], dim=0)
	# print(target_lhand_boxes_conf)
	loss_lhand_bbox = F.l1_loss(src_lhand_boxes, target_lhand_boxes, reduction='none')
	loss_lhand_bbox = loss_lhand_bbox * target_lhand_boxes_conf[:,None]
	num_lhand_boxes = (target_lhand_boxes_conf>0).sum()
	loss_lhand_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_lhand_boxes),
	box_ops.box_cxcywh_to_xyxy(target_lhand_boxes)))
	loss_lhand_giou = loss_lhand_giou * target_lhand_boxes_conf
	if num_lhand_boxes > 0:
	losses['loss_lhand_bbox'] = loss_lhand_bbox.sum() / num_lhand_boxes
	losses['loss_lhand_giou'] = loss_lhand_giou.sum() / num_lhand_boxes
	else:
	losses['loss_lhand_bbox'] = loss_lhand_bbox.sum() * 0
	losses['loss_lhand_giou'] = loss_lhand_giou.sum() * 0


	if 'pred_rhand_boxes' in outputs and face_hand_box:
	src_rhand_boxes = outputs['pred_rhand_boxes'][idx]
	target_rhand_boxes = torch.cat(
	[t['rhand_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_rhand_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['rhand_bbox_valid'], indices)], dim=0)
	loss_rhand_bbox = F.l1_loss(src_rhand_boxes, target_rhand_boxes, reduction='none')
	loss_rhand_bbox = loss_rhand_bbox * target_rhand_boxes_conf[:,None]
	num_rhand_boxes = (target_rhand_boxes_conf>0).sum()
	loss_rhand_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_rhand_boxes),
	box_ops.box_cxcywh_to_xyxy(target_rhand_boxes)))
	loss_rhand_giou = loss_rhand_giou * target_rhand_boxes_conf
	if num_rhand_boxes > 0:
	losses['loss_rhand_bbox'] = loss_rhand_bbox.sum() / num_rhand_boxes
	losses['loss_rhand_giou'] = loss_rhand_giou.sum() / num_rhand_boxes
	else:
	losses['loss_rhand_bbox'] = loss_rhand_bbox.sum() * 0
	losses['loss_rhand_giou'] = loss_rhand_giou.sum() * 0

	if 'pred_face_boxes' in outputs and face_hand_box:
	src_face_boxes = outputs['pred_face_boxes'][idx]
	target_face_boxes = torch.cat(
	[t['face_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_face_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['face_bbox_valid'], indices)], dim=0)
	loss_face_bbox = F.l1_loss(src_face_boxes, target_face_boxes, reduction='none')
	loss_face_bbox = loss_face_bbox * target_face_boxes_conf[:,None]
	num_face_boxes = (target_face_boxes_conf>0).sum()
	loss_face_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_face_boxes),
	box_ops.box_cxcywh_to_xyxy(target_face_boxes)))
	loss_face_giou = loss_face_giou * target_face_boxes_conf
	if num_face_boxes > 0:
	losses['loss_face_bbox'] = loss_face_bbox.sum() / num_face_boxes
	losses['loss_face_giou'] = loss_face_giou.sum() / num_face_boxes
	else:
	losses['loss_face_bbox'] = loss_face_bbox.sum() * 0
	losses['loss_face_giou'] = loss_face_giou.sum() * 0

	return losses

	def loss_dn_boxes(self, outputs, targets, indices, idx, num_boxes,
	data_batch):
	"""
	Input:
	- src_boxes: bs, num_dn, 4
	- tgt_boxes: bs, num_dn, 4

	"""
	indices = indices[0]
	num_tgt = outputs['num_tgt']
	src_boxes = outputs['dn_bbox_pred']
	tgt_boxes = outputs['dn_bbox_input']

	if 'num_tgt' not in outputs:
	device = outputs['pred_logits'].device
	losses = {
	'dn_loss_bbox': src_boxes.sum()*0,
	'dn_loss_giou': src_boxes.sum()*0,
	}
	return losses

	if 'num_tgt' not in outputs:
	device = outputs['pred_logits'].device
	losses = {
	'dn_loss_bbox': src_boxes.sum()*0,
	'dn_loss_giou': src_boxes.sum()*0,
	}
	return losses


	return self.tgt_loss_boxes(src_boxes, tgt_boxes, num_tgt)

	def loss_dn_labels(self, outputs, targets, indices, idx, num_boxes,
	data_batch):
	"""
	Input:
	- src_logits: bs, num_dn, num_classes
	- tgt_labels: bs, num_dn

	"""
	indices = indices[0]
	if 'num_tgt' not in outputs:
	device = outputs['pred_logits'].device
	losses = {
	'dn_loss_ce': outputs['pred_logits'].sum()*0,
	}
	return losses
	num_tgt = outputs['num_tgt']
	src_logits = outputs['dn_class_pred'] # bs, num_dn, text_len
	tgt_labels = outputs['dn_class_input']

	return self.tgt_loss_labels(src_logits, tgt_labels, num_tgt)

	@torch.no_grad()
	def loss_matching_cost(self, outputs, targets, indices, idx, num_boxes,
	data_batch):
	"""
	Input:
	- src_logits: bs, num_dn, num_classes
	- tgt_labels: bs, num_dn

	"""
	cost_mean_dict = indices[1]
	losses = {'set_{}'.format(k): v for k, v in cost_mean_dict.items()}
	return losses

	def _get_src_permutation_idx(self, indices):
	# permute predictions following indices
	batch_idx = torch.cat(
	[torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
	src_idx = torch.cat([src for (src, _) in indices])
	return batch_idx, src_idx

	def _get_tgt_permutation_idx(self, indices):
	# permute targets following indices
	batch_idx = torch.cat(
	[torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
	tgt_idx = torch.cat([tgt for (_, tgt) in indices])
	return batch_idx, tgt_idx

	def get_loss(self, loss, outputs, targets, data_batch, indices, num_boxes,
	**kwargs):
	loss_map = {
	'smpl_pose': self.loss_smpl_pose,
	'smpl_beta': self.loss_smpl_beta,
	'smpl_expr': self.loss_smpl_expr,
	'smpl_kp2d': self.loss_smpl_kp2d,
	'smpl_kp2d_ba': self.loss_smpl_kp2d_ba,
	'smpl_kp3d_ra': self.loss_smpl_kp3d_ra,
	'smpl_kp3d': self.loss_smpl_kp3d,
	'labels': self.loss_labels,
	'cardinality': self.loss_cardinality,
	'keypoints': self.loss_keypoints,
	'boxes': self.loss_boxes,
	'dn_label': self.loss_dn_labels,
	'dn_bbox': self.loss_dn_boxes,
	'matching': self.loss_matching_cost,
	}

	idx = self._get_src_permutation_idx(indices[0])
	# pdb.set_trace()
	assert loss in loss_map, f'do you really want to compute {loss} loss?'
	return loss_map[loss](outputs, targets, indices, idx, num_boxes,
	data_batch, **kwargs)

	def prep_for_dn2(self, mask_dict):
	known_bboxs = mask_dict['known_bboxs']
	known_labels = mask_dict['known_labels']
	output_known_coord = mask_dict['output_known_coord']
	output_known_class = mask_dict['output_known_class']
	num_tgt = mask_dict['pad_size']

	return known_labels, known_bboxs, output_known_class, output_known_coord, num_tgt

	## SMPL losses

	def forward(self, outputs, targets, data_batch, return_indices=False):
	""" This performs the loss computation.
	Parameters:
	outputs: dict of tensors, see the output specification of the model for the format
	targets: list of dicts, such that len(targets) == batch_size.
	The expected keys in each dict depends on the losses applied, see each loss' doc

	return_indices: used for vis. if True, the layer0-5 indices will be returned as well.

	"""
	# import pdb; pdb.set_trace()
	outputs_without_aux = {
	k: v
	for k, v in outputs.items() if k != 'aux_outputs'
	}
	device = next(iter(outputs.values())).device

	# Compute the average number of target boxes accross all nodes, for normalization purposes
	num_boxes = sum(len(t['boxes']) for t in targets)
	num_boxes = torch.as_tensor([num_boxes],
	dtype=torch.float,
	device=device)
	if is_dist_avail_and_initialized():
	torch.distributed.all_reduce(num_boxes)
	num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()

	# loss for final layer
	# pdb.set_trace()
	indices = self.matcher(outputs_without_aux, targets, data_batch)
	if return_indices:
	indices0_copy = indices
	indices_list = []
	losses = {}
	smpl_loss = ['smpl_pose', 'smpl_beta', 'smpl_expr', 'smpl_kp2d',
	'smpl_kp2d_ba', 'smpl_kp3d', 'smpl_kp3d_ra']
	# import pdb; pdb.set_trace()
	for loss in self.losses:
	# print(loss)
	# print(self.get_loss(loss, outputs, targets, indices, num_boxes))
	kwargs = {}

	if loss == 'keypoints' or loss in smpl_loss:
	kwargs.update({'face_hand_kpt': True})
	if loss == 'boxes':
	kwargs.update({'face_hand_box': True})

	losses.update(
	self.get_loss(
	loss, outputs, targets,
	data_batch, indices,
	num_boxes, **kwargs
	))

	# In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
	if 'aux_outputs' in outputs:
	for idx, aux_outputs in enumerate(outputs['aux_outputs']):
	indices = self.matcher(aux_outputs, targets, data_batch)
	if return_indices:
	indices_list.append(indices)
	for loss in self.losses:
	kwargs = {}
	if loss == 'boxes':
	kwargs.update({'face_hand_box': False})
	if idx >= self.num_box_decoder_layers:
	kwargs.update({'face_hand_box': True})

	if loss == 'masks':
	continue

	if loss == 'keypoints':
	if idx < self.num_box_decoder_layers:
	continue
	elif idx < self.num_hand_face_decoder_layers:
	kwargs.update({'face_hand_kpt': False})
	else:
	kwargs.update({'face_hand_kpt': True})

	if loss in smpl_loss:
	if idx < self.num_box_decoder_layers:
	continue
	elif idx < self.num_hand_face_decoder_layers:
	kwargs.update({'face_hand_kpt': False})
	else:
	kwargs.update({'face_hand_kpt': True})
	if loss == 'labels':
	# Logging is enabled only for the last layer
	kwargs = {'log': False}

	# if loss == 'smpl_expr' and idx < self.num_box_decoder_layers:
	# continue


	# import pdb;pdb.set_trace()
	l_dict = self.get_loss(loss, aux_outputs, targets,
	data_batch, indices, num_boxes,
	**kwargs)
	l_dict = {k + f'_{idx}': v for k, v in l_dict.items()}
	losses.update(l_dict)

	# interm_outputs loss
	if 'interm_outputs' in outputs:
	interm_outputs = outputs['interm_outputs']
	indices = self.matcher(interm_outputs, targets)
	if return_indices:
	indices_list.append(indices)
	for loss in self.losses:
	if loss in ['dn_bbox', 'dn_label', 'keypoints']:
	continue
	if loss in [
	'smpl_pose', 'smpl_beta', 'smpl_kp2d_ba', 'smpl_kp2d',
	'smpl_kp3d_ra', 'smpl_kp3d', 'smpl_expr'
	]:
	continue
	kwargs = {}
	if loss == 'labels':
	kwargs = {'log': False}
	l_dict = self.get_loss(loss, interm_outputs, targets,
	data_batch, indices, num_boxes,
	**kwargs)
	l_dict = {k + f'_interm': v for k, v in l_dict.items()}
	losses.update(l_dict)

	# aux_init loss
	if 'query_expand' in outputs:
	interm_outputs = outputs['query_expand']
	indices = self.matcher(interm_outputs, targets)
	if return_indices:
	indices_list.append(indices)
	for loss in self.losses:
	if loss in ['dn_bbox', 'dn_label']:
	continue
	kwargs = {}

	if loss == 'labels':
	kwargs = {'log': False}
	l_dict = self.get_loss(loss, interm_outputs, targets,
	data_batch, indices, num_boxes,
	**kwargs)
	l_dict = {k + f'_query_expand': v for k, v in l_dict.items()}
	losses.update(l_dict)

	if return_indices:
	indices_list.append(indices0_copy)
	return losses, indices_list

	return losses

	def tgt_loss_boxes(
	self,
	src_boxes,
	tgt_boxes,
	num_tgt,
	):
	"""
	Input:
	- src_boxes: bs, num_dn, 4
	- tgt_boxes: bs, num_dn, 4

	"""

	loss_bbox = F.l1_loss(src_boxes, tgt_boxes, reduction='none')

	losses = {}
	losses['dn_loss_bbox'] = loss_bbox.sum() / num_tgt

	loss_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_boxes.flatten(0, 1)),
	box_ops.box_cxcywh_to_xyxy(tgt_boxes.flatten(0, 1))))
	losses['dn_loss_giou'] = loss_giou.sum() / num_tgt
	return losses

	def tgt_loss_labels(self,
	src_logits: Tensor,
	tgt_labels: Tensor,
	num_tgt: int,
	log: bool = True):
	"""
	Input:
	- src_logits: bs, num_dn, num_classes
	- tgt_labels: bs, num_dn

	"""
	target_classes_onehot = torch.zeros([
	src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1
	],
	dtype=src_logits.dtype,
	layout=src_logits.layout,
	device=src_logits.device)
	target_classes_onehot.scatter_(2, tgt_labels.unsqueeze(-1), 1)
	target_classes_onehot = target_classes_onehot[:, :, :-1]
	loss_ce = sigmoid_focal_loss(src_logits,
	target_classes_onehot,
	num_tgt,
	alpha=self.focal_alpha,
	gamma=2) * src_logits.shape[1]
	losses = {'dn_loss_ce': loss_ce}

	return losses


	class SetCriterion_Box(nn.Module):
	def __init__(self,
	num_classes,
	matcher,
	weight_dict,
	focal_alpha,
	losses,
	num_box_decoder_layers=2,
	num_hand_face_decoder_layers=4,
	num_body_points=17,
	num_hand_points=6,
	num_face_points=6,
	smpl_loss_config=None,
	convention='smplx_137'):
	super().__init__()
	self.num_classes = num_classes
	self.matcher = matcher
	self.weight_dict = weight_dict
	self.losses = losses
	self.focal_alpha = focal_alpha
	self.vis = 0.1
	self.abs = 1
	self.num_body_points = 0
	self.num_hand_points = 0
	self.num_face_points = 0
	self.num_box_decoder_layers = num_box_decoder_layers
	self.num_hand_face_decoder_layers = num_hand_face_decoder_layers
	self.convention = convention


	def loss_labels(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	log=True):
	"""Classification loss (Binary focal loss) targets dicts must contain
	the key "labels" containing a tensor of dim [nb_target_boxes]"""
	indices = indices[0]
	valid_num = 0
	for indice in indices[0]:
	valid_num+=len(indice)
	assert 'pred_logits' in outputs
	src_logits = outputs['pred_logits']
	target_classes_o = torch.cat(
	[t['labels'][J] for t, (_, J) in zip(targets, indices)])
	target_classes = torch.full(src_logits.shape[:2],
	self.num_classes,
	dtype=torch.int64,
	device=src_logits.device)
	if valid_num == 0:

	return {'loss_ce': src_logits.sum()*0}
	target_classes[idx] = target_classes_o

	target_classes_onehot = torch.zeros([
	src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1
	],
	dtype=src_logits.dtype,
	layout=src_logits.layout,
	device=src_logits.device)
	target_classes_onehot.scatter_(2, target_classes.unsqueeze(-1), 1)

	target_classes_onehot = target_classes_onehot[:, :, :-1]
	loss_ce = sigmoid_focal_loss(src_logits,
	target_classes_onehot,
	num_boxes,
	alpha=self.focal_alpha,
	gamma=2) * src_logits.shape[1]
	losses = {'loss_ce': loss_ce}

	if log:
	# TODO this should probably be a separate loss, not hacked in this one here
	losses['class_error'] = 100 - accuracy(src_logits[idx],
	target_classes_o)[0]
	return losses

	@torch.no_grad()
	def loss_cardinality(self, outputs, targets, indices, num_boxes,
	data_batch):
	"""Compute the cardinality error, ie the absolute error in the number
	of predicted non-empty boxes This is not really a loss, it is intended
	for logging purposes only.

	It doesn't propagate gradients
	"""
	pred_logits = outputs['pred_logits']
	device = pred_logits.device
	tgt_lengths = torch.as_tensor([len(v['labels']) for v in targets],
	device=device)
	if tgt_lengths == 0:
	return {'cardinality_error': pred_logits.sum()*0}
	# Count the number of predictions that are NOT "no-object" (which is the last class)
	card_pred = (pred_logits.argmax(-1) !=
	pred_logits.shape[-1] - 1).sum(1)
	card_err = F.l1_loss(card_pred.float(), tgt_lengths.float())
	losses = {'cardinality_error': card_err}
	return losses

	def loss_smpl_pose(self, outputs, targets, indices, idx, num_boxes,
	data_batch, face_hand_kpt=False):
	indices = indices[0]
	device = outputs['pred_logits'].device
	# import pdb
	# pdb.set_trace()

	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)


	pred_smpl_body_pose = outputs['pred_smpl_pose'][idx] # 22
	pred_smpl_lhand_pose = outputs['pred_smpl_lhand_pose'][idx] # 15
	pred_smpl_rhand_pose = outputs['pred_smpl_rhand_pose'][idx] # 15
	pred_smpl_jaw_pose = outputs['pred_smpl_jaw_pose'][idx]

	pred_smplx_pose = torch.cat((pred_smpl_body_pose, pred_smpl_lhand_pose,
	pred_smpl_rhand_pose, pred_smpl_jaw_pose),
	dim=1)

	targets_smpl_pose = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['smplx_pose'], indices)],
	dim=0)
	targets_smpl_pose = batch_rodrigues(targets_smpl_pose.view(
	-1, 3)).view(-1, 53, 3, 3)
	conf = torch.cat([
	t[i] for t, (_, i) in zip(data_batch['smplx_pose_valid'], indices)
	], dim=0)

	conf = (conf.reshape(-1,53,3)[:,:,:,None]).repeat(1,1,1,3)
	losses = {}
	if valid_num == 0:
	losses['loss_smpl_pose_root'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	losses['loss_smpl_pose_body'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	losses['loss_smpl_pose_lhand'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	losses['loss_smpl_pose_rhand'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	losses['loss_smpl_pose_jaw'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	return losses

	# valid_pos = conf > 0

	if conf.sum() == 0:
	losses['loss_smpl_pose_root'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	losses['loss_smpl_pose_body'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	losses['loss_smpl_pose_lhand'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	losses['loss_smpl_pose_rhand'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	losses['loss_smpl_pose_jaw'] = torch.as_tensor(0., device=device) + pred_smplx_pose.sum() * 0
	return losses

	loss_smpl_pose = \
	F.l1_loss(
	pred_smplx_pose,
	targets_smpl_pose,
	reduction='none'
	)
	# pdb.set_trace()
	loss_smpl_pose = loss_smpl_pose * conf
	loss_smpl_pose = loss_smpl_pose.sum([-1,-2])
	# loss_smpl_pose[:,0] = loss_smpl_pose[:,0]*5
	if face_hand_kpt:
	losses = {
	'loss_smpl_pose_root': loss_smpl_pose[:, 0].sum() / num_boxes,
	'loss_smpl_pose_body': loss_smpl_pose[:, 1:22].sum() / num_boxes,
	'loss_smpl_pose_lhand': loss_smpl_pose[:, 22:37].sum() / num_boxes,
	'loss_smpl_pose_rhand': loss_smpl_pose[:, 37:52].sum() / num_boxes,
	'loss_smpl_pose_jaw': loss_smpl_pose[:, 52].sum() / num_boxes,
	}
	else:
	losses = {
	'loss_smpl_pose_root': loss_smpl_pose[:, 0].sum() / num_boxes,
	'loss_smpl_pose_body': loss_smpl_pose[:, 1:22].sum() / num_boxes,
	'loss_smpl_pose_lhand': 0 * loss_smpl_pose[:, 22:37].sum() / num_boxes,
	'loss_smpl_pose_rhand': 0 * loss_smpl_pose[:, 37:52].sum() / num_boxes,
	'loss_smpl_pose_jaw': loss_smpl_pose[:, 52].sum() / num_boxes,
	}
	# losses = {'loss_smpl_pose': loss_smpl_pose.sum() / num_boxes}
	return losses

	def loss_smpl_beta(self, outputs, targets, indices, idx, num_boxes,
	data_batch, face_hand_kpt=False):
	indices = indices[0]
	device = outputs['pred_logits'].device
	# import pdb
	# pdb.set_trace()

	pred_smpl_betas = outputs['pred_smpl_beta'][idx]


	targets_smpl_betas = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['smplx_shape'], indices)],
	dim=0)
	# import pdb
	# pdb.set_trace()

	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)
	losses = {}
	if valid_num == 0:
	losses['loss_smpl_beta'] = torch.as_tensor(0., device=device) + pred_smpl_betas.sum() * 0
	return losses


	conf = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_shape_valid'], indices)], dim=0)

	# valid_pos = conf > 0
	if conf.sum() == 0:
	return {
	'loss_smpl_beta': torch.as_tensor(0., device=device) + pred_smpl_betas.sum() * 0
	}

	loss_smpl_betas = \
	F.l1_loss(
	pred_smpl_betas,
	targets_smpl_betas,
	reduction='none'
	)
	# pdb.set_trace()

	loss_smpl_betas = loss_smpl_betas.sum(-1) * conf
	losses = {'loss_smpl_beta': loss_smpl_betas.sum() / num_boxes}
	return losses

	def loss_smpl_expr(self, outputs, targets, indices, idx, num_boxes,
	data_batch, face_hand_kpt=False):
	indices = indices[0]
	device = outputs['pred_logits'].device
	pred_smpl_expr = outputs['pred_smpl_expr'][idx]
	# import pdb
	# pdb.set_trace()
	targets_smpl_expr = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_expr'], indices)], dim=0)
	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)
	losses = {}
	if valid_num == 0:
	losses['loss_smpl_expr'] = torch.as_tensor(0., device=device) + pred_smpl_expr.sum() * 0
	return losses





	conf = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_expr_valid'], indices)], dim=0)
	# valid_pos = conf > 0
	if conf.sum() == 0:
	return {
	'loss_smpl_expr': torch.as_tensor(0., device=device) + pred_smpl_expr.sum() * 0
	}

	loss_smpl_expr = \
	F.l1_loss(
	pred_smpl_expr,
	targets_smpl_expr,
	reduction='none'
	)
	# pdb.set_trace()
	loss_smpl_expr = loss_smpl_expr.sum(-1) * conf
	if face_hand_kpt:
	losses = {'loss_smpl_expr': loss_smpl_expr.sum() / (conf.sum() + 1e-6)}
	else:
	losses = {'loss_smpl_expr': 0*loss_smpl_expr.sum() / (conf.sum() + 1e-6) }

	return losses

	def loss_smpl_kp3d(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	has_keypoints3d=None,
	face_hand_kpt=False):

	# supervision for keypoints3d wo/ ra
	device = outputs['pred_logits'].device
	indices = indices[0]
	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)


	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()

	# meta_info['joint_valid'] * meta_info['is_3D'][:, None, None])
	targets_smpl_kp3d = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['joint_cam'], indices)],
	dim=0)
	losses = {}
	if valid_num == 0:
	losses['loss_smpl_body_kp3d'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	losses['loss_smpl_lhand_kp3d'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	losses['loss_smpl_rhand_kp3d'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	losses['loss_smpl_face_kp3d'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	return losses
	targets_kp3d_conf = targets_smpl_kp3d[:,:,3:].clone()
	targets_smpl_kp3d = targets_smpl_kp3d[:,:,:3]

	targets_is_3d = torch.cat([
	t[None, None].repeat(len(i), 1, 1)
	for t, (_, i) in zip(data_batch['is_3D'], indices)
	],
	dim=0)


	targets_kp3d_conf = (targets_kp3d_conf * targets_is_3d).repeat(1, 1, 3)
	pelvis_idx = get_keypoint_idx('pelvis', self.convention)
	targets_pelvis = targets_smpl_kp3d[..., pelvis_idx, :]
	pred_pelvis = pred_smpl_kp3d[..., pelvis_idx, :]

	targets_smpl_kp3d = targets_smpl_kp3d - targets_pelvis[:, None, :]
	pred_smpl_kp3d = pred_smpl_kp3d - pred_pelvis[:, None, :]

	losses = {}
	body_idx = smpl_x.joint_part['body']
	face_idx = smpl_x.joint_part['face']
	lhand_idx = smpl_x.joint_part['lhand']
	rhand_idx = smpl_x.joint_part['rhand']

	# currently, only mpi_inf_3dhp and h36m have 3d keypoints
	# both datasets have right_hip_extra and left_hip_extra
	loss_smpl_kp3d = F.l1_loss(pred_smpl_kp3d,
	targets_smpl_kp3d,
	reduction='none')

	# If has_keypoints3d is not None, then computes the losses on the
	# instances that have ground-truth keypoints3d.
	# But the zero confidence keypoints will be included in mean.
	# Otherwise, only compute the keypoints3d
	# which have positive confidence.

	# has_keypoints3d is None when the key has_keypoints3d
	# is not in the datasets

	valid_pos = targets_kp3d_conf > 0
	if targets_kp3d_conf[valid_pos].numel() == 0:
	return {
	'loss_smpl_body_kp3d':
	torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0,
	'loss_smpl_lhand_kp3d':
	torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0,
	'loss_smpl_rhand_kp3d':
	torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0,
	'loss_smpl_face_kp3d':
	torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0,
	}
	loss_smpl_kp3d = loss_smpl_kp3d * targets_kp3d_conf

	if face_hand_kpt:
	losses['loss_smpl_body_kp3d'] = torch.sum(loss_smpl_kp3d[:, body_idx, :]) / num_boxes
	losses['loss_smpl_lhand_kp3d'] = torch.sum(loss_smpl_kp3d[:, lhand_idx, :]) / num_boxes
	losses['loss_smpl_rhand_kp3d'] = torch.sum(loss_smpl_kp3d[:, rhand_idx, :]) / num_boxes
	losses['loss_smpl_face_kp3d'] = torch.sum(loss_smpl_kp3d[:, face_idx, :]) / num_boxes
	else:
	losses['loss_smpl_body_kp3d'] = torch.sum(loss_smpl_kp3d[:, body_idx, :]) / num_boxes
	losses['loss_smpl_lhand_kp3d'] = 0*torch.sum(loss_smpl_kp3d[:, lhand_idx, :]) / num_boxes
	losses['loss_smpl_rhand_kp3d'] = 0*torch.sum(loss_smpl_kp3d[:, rhand_idx, :]) /num_boxes
	losses['loss_smpl_face_kp3d'] = 0*torch.sum(loss_smpl_kp3d[:, face_idx, :]) / num_boxes
	return losses


	def loss_smpl_kp3d_ra(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	has_keypoints3d=None,
	face_hand_kpt=False):
	# supervision for keypoints3d w/ ra
	device = outputs['pred_logits'].device
	indices = indices[0]

	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)

	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()

	# meta_info['joint_valid'] * meta_info['is_3D'][:, None, None])
	targets_smpl_kp3d = torch.cat([
	t[i] for t, (_, i) in zip(data_batch['smplx_joint_cam'], indices)
	],
	dim=0)
	losses = {}
	if valid_num == 0:
	losses['loss_smpl_rhand_kp3d_ra'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	losses['loss_smpl_body_kp3d_ra'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	losses['loss_smpl_face_kp3d_ra'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	losses['loss_smpl_lhand_kp3d_ra'] = torch.as_tensor(0., device=device) + pred_smpl_kp3d.sum() * 0
	return losses

	targets_kp3d_conf = targets_smpl_kp3d[:,:,3:].clone()

	targets_smpl_kp3d = targets_smpl_kp3d[:,:,:3]
	targets_is_3d = torch.cat([
	t[None, None].repeat(len(i), 1, 1)
	for t, (_, i) in zip(data_batch['is_3D'], indices)
	],
	dim=0)

	targets_kp3d_conf = (targets_kp3d_conf * targets_is_3d).repeat(1, 1, 3)
	targets_smpl_kp3d = targets_smpl_kp3d[..., :3].float()
	pelvis_idx = get_keypoint_idx('pelvis', self.convention)
	targets_pelvis = targets_smpl_kp3d[..., pelvis_idx, :]
	pred_pelvis = pred_smpl_kp3d[..., pelvis_idx, :]

	targets_smpl_kp3d = targets_smpl_kp3d - targets_pelvis[:, None, :]
	pred_smpl_kp3d = pred_smpl_kp3d - pred_pelvis[:, None, :]
	# calculate body, face and hand loss separately:

	losses = {}
	body_idx = smpl_x.joint_part['body']
	face_idx = smpl_x.joint_part['face']
	lhand_idx = smpl_x.joint_part['lhand']
	rhand_idx = smpl_x.joint_part['rhand']

	loss_smpl_body_kp3d = F.l1_loss(pred_smpl_kp3d[:, body_idx, :],
	targets_smpl_kp3d[:, body_idx, :],
	reduction='none')
	loss_smpl_body_kp3d = torch.sum(
	loss_smpl_body_kp3d * targets_kp3d_conf[:, body_idx, :])
	losses['loss_smpl_body_kp3d_ra'] = loss_smpl_body_kp3d / num_boxes

	# if face_hand_kpt:
	face_cam = pred_smpl_kp3d[:, face_idx, :]
	neck_cam = pred_smpl_kp3d[:, smpl_x.neck_idx, None, :]
	face_cam = face_cam - neck_cam
	loss_smpl_face_kp3d = F.l1_loss(face_cam,
	targets_smpl_kp3d[:, face_idx, :],
	reduction='none')
	loss_smpl_face_kp3d = torch.sum(
	loss_smpl_face_kp3d * targets_kp3d_conf[:, face_idx, :])
	if face_hand_kpt:
	losses['loss_smpl_face_kp3d_ra'] = (loss_smpl_face_kp3d / num_boxes)
	else:
	losses['loss_smpl_face_kp3d_ra'] = 0*(loss_smpl_face_kp3d / num_boxes)

	lhand_cam = pred_smpl_kp3d[:, lhand_idx, :]
	lwrist_cam = pred_smpl_kp3d[:, smpl_x.lwrist_idx, None, :]
	lhand_cam = lhand_cam - lwrist_cam
	loss_smpl_lhand_kp3d = F.l1_loss(lhand_cam,
	targets_smpl_kp3d[:, lhand_idx, :],
	reduction='none')
	loss_smpl_lhand_kp3d = torch.sum(
	loss_smpl_lhand_kp3d * targets_kp3d_conf[:, lhand_idx, :])

	if face_hand_kpt:
	losses['loss_smpl_lhand_kp3d_ra'] = (loss_smpl_lhand_kp3d / num_boxes)
	else:
	losses['loss_smpl_lhand_kp3d_ra'] = 0*(loss_smpl_lhand_kp3d /num_boxes)

	rhand_cam = pred_smpl_kp3d[:, rhand_idx, :]
	rwrist_cam = pred_smpl_kp3d[:, smpl_x.rwrist_idx, None, :]
	rhand_cam = rhand_cam - rwrist_cam

	loss_smpl_rhand_kp3d = F.l1_loss(rhand_cam,
	targets_smpl_kp3d[:, rhand_idx, :],
	reduction='none')
	loss_smpl_rhand_kp3d = torch.sum(
	loss_smpl_rhand_kp3d * targets_kp3d_conf[:, rhand_idx, :])

	if face_hand_kpt:
	losses['loss_smpl_rhand_kp3d_ra'] = (loss_smpl_rhand_kp3d / num_boxes)
	else:
	losses['loss_smpl_rhand_kp3d_ra'] = 0*(loss_smpl_rhand_kp3d / num_boxes)

	return losses

	def loss_smpl_kp2d(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	focal_length=5000.,
	has_keypoints2d=None,
	face_hand_kpt=False):
	"""Compute loss for 2d keypoints."""
	device = outputs['pred_logits'].device
	indices = indices[0]

	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)
	# pdb.set_trace()
	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()#.detach()
	# pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()
	# pelvis_idx = get_keypoint_idx('pelvis', self.convention)
	# pred_pelvis = pred_smpl_kp3d[..., pelvis_idx, :]

	# pred_smpl_kp3d = pred_smpl_kp3d - pred_pelvis[:, None, :] +1e-7


	pred_cam = outputs['pred_smpl_cam'][idx].float()

	targets_kp2d = torch.cat([t[i] for t, (_, i) in zip(data_batch['joint_img'], indices)], dim=0)

	keypoints2d_conf = targets_kp2d[:,:,2:].clone()
	targets_kp2d = targets_kp2d[:,:,:2]

	target_lhand_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['lhand_bbox_valid'], indices)], dim=0)
	lhand_num_boxes = target_lhand_boxes_conf.sum()
	target_rhand_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['rhand_bbox_valid'], indices)], dim=0)
	rhand_num_boxes = target_rhand_boxes_conf.sum()
	target_face_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['face_bbox_valid'], indices)], dim=0)
	face_num_boxes = target_face_boxes_conf.sum()
	# t_pose = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_pose'], indices)], dim=0)
	# t_shape = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_shape'], indices)], dim=0)
	# t_expr = torch.cat([t[i] for t, (_, i) in zip(data_batch['smplx_expr'], indices)], dim=0)

	keypoints2d_conf = keypoints2d_conf.repeat(1, 1, 2)

	targets_kp2d = targets_kp2d[:, :, :2].float()
	targets_kp2d[:,:,0] = targets_kp2d[:,:,0]/cfg.output_hm_shape[2]
	targets_kp2d[:,:,1] = targets_kp2d[:,:,1]/cfg.output_hm_shape[1]
	# targets_kp2d = targets_kp2d*2-1
	img_wh = torch.cat([data_batch['img_shape'][i][None] for i in idx[0]], dim=0).flip(-1)
	# pred_smpl_kp2d = weak_perspective_projection(pred_smpl_kp3d, scale=pred_cam[:, 0], translation=pred_cam[:, 1:3])

	# If kp2ds is normalized to [-1, 1], the center should be the center of the image;
	# if normalized to 0-1, it should be at the top left corner (0, 0)?


	pred_smpl_kp2d = project_points_new(
	points_3d=pred_smpl_kp3d,
	pred_cam=pred_cam,
	focal_length=focal_length,
	camera_center=img_wh/2
	)

	pred_smpl_kp2d = pred_smpl_kp2d / img_wh[:, None]
	vis=False
	# if 'vis' in cfg:
	# vis=cfg['vis']
	# vis = True
	if vis:
	import mmcv
	import cv2
	import numpy as np
	from detrsmpl.core.visualization.visualize_keypoints2d import visualize_kp2d
	from detrsmpl.core.visualization.visualize_smpl import visualize_smpl_hmr,render_smpl
	from detrsmpl.models.body_models.builder import build_body_model

	from pytorch3d.io import save_obj
	from detrsmpl.core.visualization.visualize_keypoints3d import visualize_kp3d

	img = mmcv.imdenormalize(
	img=(data_batch['img'][0].cpu().numpy()).transpose(1, 2, 0),
	mean=np.array([123.675, 116.28, 103.53]),
	std=np.array([58.395, 57.12, 57.375]),
	to_bgr=True).astype(np.uint8)
	cv2.imwrite('test.png', img)
	device = outputs['pred_smpl_kp3d'].device

	body_model = dict(
	type='smplx',
	keypoint_src='smplx',
	num_expression_coeffs=10,
	num_betas=10,
	keypoint_dst='smplx_137',
	model_path='data/body_models/smplx',
	use_pca=False,
	use_face_contour=True)
	bm = build_body_model(body_model).to(device)
	pred_smpl_body_pose = rotmat_to_aa(outputs['pred_smpl_pose'][idx])
	pred_smpl_lhand_pose = rotmat_to_aa(outputs['pred_smpl_lhand_pose'][idx])
	pred_smpl_rhand_pose = rotmat_to_aa(outputs['pred_smpl_rhand_pose'][idx])
	pred_smpl_jaw_pose = rotmat_to_aa(outputs['pred_smpl_jaw_pose'][idx])
	pred_smpl_shape = outputs['pred_smpl_beta'][idx]
	pred_output = bm(
	betas=pred_smpl_shape.reshape(-1, 10),
	body_pose=pred_smpl_body_pose[:,1:].reshape(-1, 21*3),
	global_orient=pred_smpl_body_pose[:,:1].reshape(-1, 3),
	left_hand_pose=pred_smpl_lhand_pose.reshape(-1, 15*3),
	right_hand_pose=pred_smpl_rhand_pose.reshape(-1, 15*3),
	leye_pose=torch.zeros_like(pred_smpl_jaw_pose).reshape(-1, 3),
	reye_pose=torch.zeros_like(pred_smpl_jaw_pose).reshape(-1, 3),
	expression=torch.zeros_like(pred_smpl_shape).reshape(-1, 10),
	jaw_pose=pred_smpl_jaw_pose.reshape(-1, 3))
	verts = pred_output['vertices']

	# for i_obj,v in enumerate(verts):
	# save_obj('./figs/pred_smpl_%d.obj'%i_obj,verts = v,faces=torch.tensor([]))
	pred_cam = outputs['pred_smpl_cam'][idx]

	targets_smpl_pose = data_batch['smplx_pose'][0]
	targets_shape = data_batch['smplx_shape'][0]
	gt_kp3d = data_batch['joint_cam'][0]

	gt_kp2d = data_batch['joint_img'][0]
	gt_body_boxes = torch.cat(
	[t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	# gt kp3d
	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()

	visualize_kp3d(gt_kp3d.detach().cpu().numpy(),
	output_path='./figs/gt3d',
	data_source='smplx_137')
	# visualize_kp3d(pred_smpl_kp3d.detach().cpu().numpy(),
	# output_path='./figs/pred3d',
	# data_source='smplx_137')
	# gt kp2d
	img =(data_batch['img'][0].permute(1,2,0)*255).int().cpu().numpy()
	gt_2d= gt_kp2d.detach().cpu().numpy()[...,:2]*data_batch['img_shape'].cpu().numpy()[0,None,None,::-1]
	gt_2d[...,0] = gt_2d[...,0]/12
	gt_2d[...,1] = gt_2d[...,1]/16
	import mmcv
	batch_id = 0
	gt_bbox = (box_ops.box_cxcywh_to_xyxy(targets[batch_id]['boxes']).reshape(-1,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[batch_id, ::-1]).reshape(-1,4)
	gt_bbox_lhand = (box_ops.box_cxcywh_to_xyxy(targets[batch_id]['lhand_boxes']).reshape(-1,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[batch_id, ::-1]).reshape(-1,4)
	gt_bbox_rhand = (box_ops.box_cxcywh_to_xyxy(targets[batch_id]['rhand_boxes']).reshape(-1,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[batch_id, ::-1]).reshape(-1,4)
	gt_bbox_face = (box_ops.box_cxcywh_to_xyxy(targets[batch_id]['face_boxes']).reshape(-1,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[batch_id, ::-1]).reshape(-1,4)
	gt_bbox = np.concatenate([gt_bbox,gt_bbox_face,gt_bbox_rhand,gt_bbox_lhand],axis=0)
	# gt_bbox = (box_ops.box_cxcywh_to_xyxy(gt_body_boxes).reshape(-1,2,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[0, ::-1][None,None,:]).reshape(-1,4)
	img = mmcv.imshow_bboxes(img.copy(), gt_bbox, show=False)

	gt_2d = data_batch['joint_img'][0][:,:,:2].cpu().numpy()data_batch['img_shape'].cpu().numpy()[0,None,None,::-1]# data_batch['joint_img'][0][:,:,2:].cpu().numpy()
	gt_2d[...,0] = gt_2d[...,0]/12
	gt_2d[...,1] = gt_2d[...,1]/16
	# data_batch['joint_img']
	# gt_kp2d = gt_2d[0][keypoints2d_conf[0]!=0]
	visualize_kp2d(
	(gt_2d).reshape(-1,2)[None],
	output_path='./figs/gt2d',
	image_array=img.copy()[None],
	# data_source='smplx_137',
	disable_limbs = True,
	overwrite=True)
	img =(data_batch['img'][0].permute(1,2,0)*255).int().cpu().numpy()
	# pred_smpl_kp2d = project_points_new(
	# points_3d=outputs['pred_smpl_kp3d'][:,:2].reshape(-1,137,3),
	# pred_cam=pred_cam,
	# focal_length=focal_length,
	# camera_center=img_wh/2
	# )

	img_shape = data_batch['img_shape'][0]




	# pred_kp2d = pred_kp2d.cpu().detach().numpy()*img_shape.cpu().numpy()[None,None ::-1]
	# pred_bbox_all = []
	# for i in idx[0]:

	# pred_bbox_body = (box_ops.box_cxcywh_to_xyxy(outputs['pred_boxes'][0,i]).reshape(2,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[0, ::-1]).reshape(1,4)
	# pred_bbox_lhand = (box_ops.box_cxcywh_to_xyxy(outputs['pred_lhand_boxes'][0,i]).reshape(2,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[0, ::-1]).reshape(1,4)
	# pred_bbox_rhand = (box_ops.box_cxcywh_to_xyxy(outputs['pred_rhand_boxes'][0,i]).reshape(2,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[0, ::-1]).reshape(1,4)
	# pred_bbox_face = (box_ops.box_cxcywh_to_xyxy(outputs['pred_face_boxes'][0,i]).reshape(2,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[0, ::-1]).reshape(1,4)
	# pred_bbox = np.concatenate([pred_bbox_body,pred_bbox_face,pred_bbox_rhand,pred_bbox_lhand],axis=0)
	# pred_bbox_all.append(pred_bbox)
	# src_body_boxes = outputs['pred_boxes'][idx]
	# pred_bbox_all = np.concatenate(pred_bbox_all,axis=0)

	pred_bbox_body = (box_ops.box_cxcywh_to_xyxy(outputs['pred_boxes'][idx]).reshape(-1,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[1, ::-1]).reshape(-1,4)
	pred_bbox_lhand = (box_ops.box_cxcywh_to_xyxy(outputs['pred_lhand_boxes'][idx]).reshape(-1,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[1, ::-1]).reshape(-1,4)
	pred_bbox_rhand = (box_ops.box_cxcywh_to_xyxy(outputs['pred_rhand_boxes'][idx]).reshape(-1,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[1, ::-1]).reshape(-1,4)
	pred_bbox_face = (box_ops.box_cxcywh_to_xyxy(outputs['pred_face_boxes'][idx]).reshape(-1,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[1, ::-1]).reshape(-1,4)
	pred_bbox = np.concatenate([pred_bbox_body,pred_bbox_face,pred_bbox_rhand,pred_bbox_lhand],axis=0)
	# pred_bbox_body = (box_ops.box_cxcywh_to_xyxy(src_body_boxes).reshape(-1,2,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[0, ::-1][None,None,:]).reshape(-1,4)
	# import ipdb;ipdb.set_trace()
	img = mmcv.imshow_bboxes(img.copy(), pred_bbox, show=False)
	# cv2.imwrite('test.png',img)

	visualize_kp2d(
	(pred_smpl_kp2d*img_wh[:, None])[None].detach().cpu().numpy(),
	output_path='./figs/pred2d',
	image_array=img.copy()[None],
	data_source='smplx_137',
	overwrite=True)

	# visualize_kp2d(
	# (pred_smpl_kp2d*img_wh[:, None])[None].detach().cpu().numpy(),
	# output_path='./figs/pred2d',
	# image_array=img.copy()[None],
	# data_source='smplx_137',
	# overwrite=True)
	vis_smpl=True
	if vis_smpl:

	gt_output = bm(
	betas=targets_shape.reshape(-1, 10),
	body_pose=targets_smpl_pose[:,3:66].reshape(-1, 21*3),
	global_orient=targets_smpl_pose[:,:3].reshape(-1, 3),
	left_hand_pose=targets_smpl_pose[:,66:111].reshape(-1, 15*3),
	right_hand_pose=targets_smpl_pose[:,111:156].reshape(-1, 15*3),
	leye_pose=torch.zeros_like(targets_smpl_pose[:,:3]).reshape(-1, 3),
	reye_pose=torch.zeros_like(targets_smpl_pose[:,:3]).reshape(-1, 3),
	expression=torch.zeros_like(targets_shape).reshape(-1, 10),
	jaw_pose=targets_smpl_pose[:,156:].reshape(-1, 3))
	verts = gt_output['vertices']
	for i_obj,v in enumerate(verts):
	save_obj('./figs/gt_smpl_%d.obj'%i_obj,verts = v,faces=torch.tensor([]))
	import ipdb;ipdb.set_trace()
	losses = {}

	if valid_num == 0:
	losses['loss_smpl_body_kp2d'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_lhand_kp2d'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_rhand_kp2d'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_face_kp2d'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0
	return losses

	body_idx = smpl_x.joint_part['body']
	face_idx = smpl_x.joint_part['face']
	lhand_idx = smpl_x.joint_part['lhand']
	rhand_idx = smpl_x.joint_part['rhand']

	loss_smpl_kp2d = F.l1_loss(pred_smpl_kp2d,
	targets_kp2d,
	reduction='none')

	# If has_keypoints2d is not None, then computes the losses on the
	# instances that have ground-truth keypoints2d.
	# But the zero confidence keypoints will be included in mean.
	# Otherwise, only compute the keypoints2d
	# which have positive confidence.
	# has_keypoints2d is None when the key has_keypoints2d
	# is not in the datasets
	# import pdb; pdb.set_trace()

	valid_pos = keypoints2d_conf > 0
	if keypoints2d_conf[valid_pos].numel() == 0:
	return {
	'loss_smpl_body_kp2d': torch.as_tensor(0., device=device) + loss_smpl_kp2d.sum()*0,
	'loss_smpl_lhand_kp2d': torch.as_tensor(0., device=device) + loss_smpl_kp2d.sum()*0,
	'loss_smpl_rhand_kp2d': torch.as_tensor(0., device=device) + loss_smpl_kp2d.sum()*0,
	'loss_smpl_face_kp2d': torch.as_tensor(0., device=device) + loss_smpl_kp2d.sum()*0,
	}
	loss_smpl_kp2d = loss_smpl_kp2d * keypoints2d_conf
	# loss /= keypoints2d_conf[valid_pos].numel()


	if face_hand_kpt:
	losses['loss_smpl_body_kp2d'] = torch.sum(loss_smpl_kp2d[:, body_idx, :]) / num_boxes
	if lhand_num_boxes>0:
	losses['loss_smpl_lhand_kp2d'] = torch.sum(loss_smpl_kp2d[:, lhand_idx, :]) / lhand_num_boxes
	else:
	losses['loss_smpl_lhand_kp2d'] =torch.as_tensor(0., device=device) + loss_smpl_kp2d.sum()*0
	if rhand_num_boxes>0:
	losses['loss_smpl_rhand_kp2d'] = torch.sum(loss_smpl_kp2d[:, rhand_idx, :]) / rhand_num_boxes
	else:
	losses['loss_smpl_rhand_kp2d'] = torch.as_tensor(0., device=device) + loss_smpl_kp2d.sum()*0
	if face_num_boxes>0:
	losses['loss_smpl_face_kp2d'] = torch.sum(loss_smpl_kp2d[:, face_idx, :]) / face_num_boxes
	else:
	losses['loss_smpl_face_kp2d'] = torch.as_tensor(0., device=device) + loss_smpl_kp2d.sum()*0
	else:
	losses['loss_smpl_body_kp2d'] = torch.sum(loss_smpl_kp2d[:, body_idx, :]) / num_boxes
	losses['loss_smpl_lhand_kp2d'] = 0*torch.sum(loss_smpl_kp2d[:, lhand_idx, :]) / (keypoints2d_conf[:, lhand_idx].sum() + 1e-6)
	losses['loss_smpl_rhand_kp2d'] = 0*torch.sum(loss_smpl_kp2d[:, rhand_idx, :]) / (keypoints2d_conf[:, rhand_idx].sum() + 1e-6)
	losses['loss_smpl_face_kp2d'] = 0*torch.sum(loss_smpl_kp2d[:, face_idx, :]) / (keypoints2d_conf[:, face_idx].sum() + 1e-6)


	return losses

	def loss_smpl_kp2d_ba(self,
	outputs,
	targets,
	indices,
	idx,
	num_boxes,
	data_batch,
	focal_length=5000.,
	has_keypoints2d=None,
	face_hand_kpt=False):
	"""Compute loss for 2d keypoints."""
	device = outputs['pred_logits'].device
	indices = indices[0]
	# pdb.set_trace()
	pred_smpl_kp3d = outputs['pred_smpl_kp3d'][idx].float()#.detach()
	pred_cam = outputs['pred_smpl_cam'][idx].float()

	# pdb.set_trace()

	# max_img_res = orig_img_res.max(-1)[0]
	# torch.cat([ torch.Tensor([orig_img_res[0]]9), torch.Tensor([orig_img_res[1]]9)], 0)
	# torch.cat([orig_img_res[i][None].repeat(num,1) for i, num in enumerate(instance_num)], 0)

	# orig_img_res = torch.Tensor([t['orig_size'] for t, (_, i) in zip(targets, indices)]).type_as(pred_smpl_kp3d)
	# orig_img_res = torch.Tensor([target['orig_size'] for target in targets]).type_as(pred_smpl_kp3d)
	# max_img_res = torch.cat([torch.full_like(src, i) for i, (src, _) in zip(max_img_res, indices)]).type_as(pred_smpl_kp3d)
	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)
	targets_kp2d = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['joint_img'], indices)],
	dim=0)
	losses = {}



	keypoints2d_conf = targets_kp2d[:,:,2:].clone()
	targets_kp2d = targets_kp2d[:,:,:2]

	keypoints2d_conf = keypoints2d_conf.repeat(1, 1, 2)
	targets_kp2d = targets_kp2d[:, :, :2].float()
	targets_kp2d[:, :, 0] = targets_kp2d[:, :, 0] / cfg.output_hm_shape[2]
	targets_kp2d[:, :, 1] = targets_kp2d[:, :, 1] / cfg.output_hm_shape[1]
	# targets_kp2d = targets_kp2d * 2 - 1
	img_wh = torch.cat([data_batch['img_shape'][i][None] for i in idx[0]], dim=0).flip(-1)

	pred_smpl_kp2d = project_points_new(
	points_3d=pred_smpl_kp3d,
	pred_cam=pred_cam,
	focal_length=focal_length,
	camera_center=img_wh/2
	)

	pred_smpl_kp2d = pred_smpl_kp2d / img_wh[:, None]

	if valid_num == 0:
	losses['loss_smpl_body_kp2d_ba'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_lhand_kp2d_ba'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_rhand_kp2d_ba'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0

	losses['loss_smpl_face_kp2d_ba'] = torch.as_tensor(0., device=device) + pred_smpl_kp2d.sum()*0
	return losses
	# rhand bbox
	rhand_bbox_valid = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['rhand_bbox_valid'], indices) ], dim=0)
	rhand_bbox_gt = torch.cat(
	[t['rhand_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	rhand_bbox_gt = (box_ops.box_cxcywh_to_xyxy(rhand_bbox_gt).
	reshape(-1,2,2)*img_wh[:, None]).reshape(-1, 4)
	num_rhand_bbox = rhand_bbox_valid.sum()
	# lhand bbox
	lhand_bbox_valid = torch.cat([
	t[i] for t, (_, i) in zip(data_batch['lhand_bbox_valid'], indices)], dim=0)
	lhand_bbox_gt = torch.cat(
	[t['lhand_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	lhand_bbox_gt = (box_ops.box_cxcywh_to_xyxy(lhand_bbox_gt).
	reshape(-1,2,2)*img_wh[:, None]).reshape(-1, 4)
	num_lhand_bbox = lhand_bbox_valid.sum()
	# face bbox
	face_bbox_valid = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['face_bbox_valid'], indices)], dim=0)
	face_bbox_gt = torch.cat(
	[t['face_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	face_bbox_gt = (box_ops.box_cxcywh_to_xyxy(face_bbox_gt).
	reshape(-1,2,2)*img_wh[:, None]).reshape(-1, 4)
	num_face_bbox = face_bbox_valid.sum()
	img_shape = torch.cat(
	[t[None].repeat(len(i), 1) for t, (_, i) in zip(data_batch['img_shape'], indices)], dim=0)

	# joint_proj = (joint_proj / 2 + 0.5)
	# joint_proj[:, :, 0] = joint_proj[:, :, 0] * img_shape[:, 1:]
	# joint_proj[:, :, 1] = joint_proj[:, :, 1] * img_shape[:, :1]

	if not (lhand_bbox_valid + rhand_bbox_valid + face_bbox_valid == 0).all():
	for part_name, bbox in (
	('lhand', lhand_bbox_gt),
	('rhand', rhand_bbox_gt),
	('face', face_bbox_gt)):

	x = targets_kp2d[:, smpl_x.joint_part[part_name], 0]
	y = targets_kp2d[:, smpl_x.joint_part[part_name], 1]
	# trunc = joint_trunc[:, smpl_x.joint_part[part_name], 0]
	trunc = keypoints2d_conf[:, smpl_x.joint_part[part_name], 0].clone()
	# x in [0, 1]? bbox in [0, 1].
	x -= (bbox[:, None, 0] / img_shape[:, 1:])
	# x
	x *= (img_shape[:, 1:] / (bbox[:, None, 2] - bbox[:, None, 0] + 1e-6))

	y -= (bbox[:, None, 1] / img_shape[:, :1])
	y *= (img_shape[:, :1] / (bbox[:, None, 3] - bbox[:, None, 1] + 1e-6))
	# transformed to 0-1 bbox space

	trunc = ((x >= 0) (x <= 1) *
	(y >= 0) * (y <= 1))


	coord = torch.stack((x, y), 2)


	targets_kp2d = torch.cat(
	(targets_kp2d[:, :smpl_x.joint_part[part_name][0], :], coord,
	targets_kp2d[:, smpl_x.joint_part[part_name][-1] + 1:, :]),
	1)

	x_pred = pred_smpl_kp2d[:, smpl_x.joint_part[part_name], 0]
	y_pred = pred_smpl_kp2d[:, smpl_x.joint_part[part_name], 1]
	# bbox: xyxy img_shape: hw
	x_pred -= (bbox[:, None, 0] / img_shape[:, 1:])
	x_pred *= (img_shape[:, 1:] / (bbox[:, None, 2] - bbox[:, None, 0] + 1e-6))

	y_pred -= (bbox[:, None, 1] / img_shape[:, :1])
	y_pred *= (img_shape[:, :1] / (bbox[:, None, 3] - bbox[:, None, 1] + 1e-6))

	coord_pred = torch.stack((x_pred, y_pred), 2)
	trans = []

	for bid in range(coord_pred.shape[0]):
	mask = trunc[bid] == 1

	if torch.sum(mask) == 0:
	trans.append(torch.zeros((2)).float().cuda())
	else:
	trans.append(
	(-coord_pred[bid, mask, :2] + targets_kp2d[:, smpl_x.joint_part[part_name], :][bid, mask, :2]).mean(0))
	trans = torch.stack(trans)[:, None, :]

	coord_pred = coord_pred + trans # global translation alignment
	pred_smpl_kp2d = torch.cat(
	(pred_smpl_kp2d[:, :smpl_x.joint_part[part_name][0], :], coord_pred,
	pred_smpl_kp2d[:, smpl_x.joint_part[part_name][-1] + 1:, :]),
	1)

	vis = False
	if vis:
	import mmcv
	import cv2
	import numpy as np
	from detrsmpl.core.visualization.visualize_keypoints2d import visualize_kp2d
	from detrsmpl.core.visualization.visualize_smpl import visualize_smpl_hmr,render_smpl
	from detrsmpl.models.body_models.builder import build_body_model

	from pytorch3d.io import save_obj
	from detrsmpl.core.visualization.visualize_keypoints3d import visualize_kp3d

	img = mmcv.imdenormalize(
	img=(data_batch['img'][0].cpu().numpy()).transpose(1, 2, 0),
	mean=np.array([123.675, 116.28, 103.53]),
	std=np.array([58.395, 57.12, 57.375]),
	to_bgr=True).astype(np.uint8).copy()

	device = outputs['pred_smpl_kp3d'].device
	gt_2d = (coord)


	img = mmcv.imshow_bboxes(img,bbox[0,None].int().cpu().numpy(),show=False)
	gt_2d[:,:,0] /= (img_shape[:, 1:] / (bbox[:, None, 2] - bbox[:, None, 0]))
	gt_2d[:,:,1] /= (img_shape[:, :1] / (bbox[:, None, 3] - bbox[:, None, 1]))
	gt_2d_ori = gt_2d.clone()
	gt_2d_ori[:,:,0] += (bbox[:, None, 0] / img_shape[:, 1:])
	gt_2d_ori[:,:,1] += (bbox[:, None, 1] / img_shape[:, :1])
	gt_2d = (gt_2d*img_wh[:, None]).cpu().detach().numpy()
	gt_2d_ori = (gt_2d_ori*img_wh[:, None]).cpu().detach().numpy()

	# visualize keypoints after translation to bbox and to gt
	pred_2d = (coord_pred).clone()

	pred_2d[:,:,0] /= (img_shape[:, 1:] / (bbox[:, None, 2] - bbox[:, None, 0]))
	pred_2d[:,:,1] /= (img_shape[:, :1] / (bbox[:, None, 3] - bbox[:, None, 1]))
	# visualize keypoints begore translation to bbox and to gt
	pred_2d_ori = (coord_pred-trans).clone()
	pred_2d_ori[:,:,0] /= (img_shape[:, 1:] / (bbox[:, None, 2] - bbox[:, None, 0]))
	pred_2d_ori[:,:,1] /= (img_shape[:, :1] / (bbox[:, None, 3] - bbox[:, None, 1]))
	pred_2d_ori[:,:,0] += (bbox[:, None, 0] / img_shape[:, 1:])
	pred_2d_ori[:,:,1] += (bbox[:, None, 1] / img_shape[:, :1])
	pred_2d = (pred_2d*img_wh[:, None]).cpu().detach().numpy()
	pred_2d_ori = (pred_2d_ori*img_wh[:, None]).cpu().detach().numpy()
	visualize_kp2d(
	gt_2d[0].reshape(-1,2)[None],
	output_path='./figs/gt2d%s'%part_name,
	image_array=img.copy()[None],
	# data_source='smplx_137',
	disable_limbs = True,
	overwrite=True)

	visualize_kp2d(
	gt_2d_ori[0].reshape(-1,2)[None],
	output_path='./figs/gt2d%s_ori'%part_name,
	image_array=img.copy()[None],
	# data_source='smplx_137',
	disable_limbs = True,
	overwrite=True)
	visualize_kp2d(
	pred_2d[0].reshape(-1,2)[None],
	output_path='./figs/pred2d%s'%part_name,
	image_array=img.copy()[None],
	# data_source='smplx_137',
	disable_limbs = True,
	overwrite=True)

	visualize_kp2d(
	pred_2d_ori[0].reshape(-1,2)[None],
	output_path='./figs/pred2d%s_ori'%part_name,
	image_array=img.copy()[None],
	# data_source='smplx_137',
	disable_limbs = True,
	overwrite=True)



	loss_smpl_kp2d_ba = F.l1_loss(pred_smpl_kp2d,
	targets_kp2d[:, :, :2],
	reduction='none')
	valid_pos = keypoints2d_conf > 0

	losses = {}
	if keypoints2d_conf[valid_pos].numel() == 0:
	return {
	'loss_smpl_body_kp2d_ba':
	torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0,
	'loss_smpl_lhand_kp2d_ba':
	torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0,
	'loss_smpl_rhand_kp2d_ba':
	torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0,
	'loss_smpl_face_kp2d_ba':
	torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0,
	}
	# loss /= targets_kp3d_conf[valid_pos].numel()
	# 要改
	loss_smpl_kp2d_ba = loss_smpl_kp2d_ba * keypoints2d_conf
	losses['loss_smpl_body_kp2d_ba'] = torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['body'], :]) / num_boxes
	if face_hand_kpt:
	if num_lhand_bbox>0:
	losses['loss_smpl_lhand_kp2d_ba'] = torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['lhand'], :]) / num_lhand_bbox
	else:
	losses['loss_smpl_lhand_kp2d_ba'] = torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0
	if num_rhand_bbox>0:
	losses['loss_smpl_rhand_kp2d_ba'] = torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['rhand'], :]) / num_rhand_bbox
	else:
	losses['loss_smpl_rhand_kp2d_ba'] = torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0
	if num_face_bbox>0:
	losses['loss_smpl_face_kp2d_ba'] = torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['face'], :]) / num_face_bbox
	else:
	losses['loss_smpl_face_kp2d_ba'] = torch.as_tensor(0., device=device) + loss_smpl_kp2d_ba.sum()*0
	else:
	losses['loss_smpl_lhand_kp2d_ba'] = 0*torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['lhand'], :]) / num_lhand_bbox

	losses['loss_smpl_rhand_kp2d_ba'] = 0*torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['rhand'], :]) / num_rhand_bbox

	losses['loss_smpl_face_kp2d_ba'] = 0*torch.sum(loss_smpl_kp2d_ba[:,
	smpl_x.joint_part['face'], :]) / num_face_bbox
	return losses


	def loss_boxes(self, outputs, targets, indices,
	idx, num_boxes, data_batch,
	face_hand_box=False):
	"""Compute the losses related to the bounding boxes, the L1 regression
	loss and the GIoU loss targets dicts must contain the key "boxes"
	containing a tensor of dim [nb_target_boxes, 4] The target boxes are
	expected in format (center_x, center_y, w, h), normalized by the image
	size."""
	indices = indices[0]
	device = outputs['pred_logits'].device
	assert 'pred_boxes' in outputs
	# assert 'pred_lhand_boxes' in outputs
	# assert 'pred_rhand_boxes' in outputs
	# assert 'pred_face_boxes' in outputs


	src_body_boxes = outputs['pred_boxes'][idx]
	target_body_boxes = torch.cat(
	[t['boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_body_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['body_bbox_valid'], indices)], dim=0)
	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)


	loss_body_bbox = F.l1_loss(src_body_boxes, target_body_boxes, reduction='none')
	loss_body_bbox = loss_body_bbox * target_body_boxes_conf[:,None]

	losses = {}
	losses['loss_body_bbox'] = loss_body_bbox.sum() / num_boxes
	loss_body_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_body_boxes),
	box_ops.box_cxcywh_to_xyxy(target_body_boxes)))

	loss_body_giou = loss_body_giou * target_body_boxes_conf
	losses['loss_body_giou'] = loss_body_giou.sum() / num_boxes

	if 'pred_lhand_boxes' in outputs and face_hand_box:
	src_lhand_boxes = outputs['pred_lhand_boxes'][idx]
	target_lhand_boxes = torch.cat(
	[t['lhand_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_lhand_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['lhand_bbox_valid'], indices)], dim=0)
	# print(target_lhand_boxes_conf)
	loss_lhand_bbox = F.l1_loss(src_lhand_boxes, target_lhand_boxes, reduction='none')
	loss_lhand_bbox = loss_lhand_bbox * target_lhand_boxes_conf[:,None]
	losses['loss_lhand_bbox'] = loss_lhand_bbox.sum() / num_boxes
	loss_lhand_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_lhand_boxes),
	box_ops.box_cxcywh_to_xyxy(target_lhand_boxes)))
	loss_lhand_giou = loss_lhand_giou * target_lhand_boxes_conf
	losses['loss_lhand_giou'] = loss_lhand_giou.sum() / num_boxes
	# import mmcv
	# import cv2
	# img = (data_batch['img'][0]*255).permute(1,2,0).int().detach().cpu().numpy()
	# pred_bbox = (box_ops.box_cxcywh_to_xyxy(src_lhand_boxes[0]).reshape(2,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[0, ::-1]).reshape(1,4)
	# pred_bbox = (box_ops.box_cxcywh_to_xyxy(src_lhand_boxes[0]).reshape(2,2).detach().cpu().numpy()*data_batch['img_shape'].cpu().numpy()[0, ::-1]).reshape(1,4)
	# img = mmcv.imshow_bboxes(img.copy(), pred_bbox, show=False)
	# cv2.imwrite('test.png',img)

	if 'pred_rhand_boxes' in outputs and face_hand_box:
	src_rhand_boxes = outputs['pred_rhand_boxes'][idx]
	target_rhand_boxes = torch.cat(
	[t['rhand_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_rhand_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['rhand_bbox_valid'], indices)], dim=0)
	loss_rhand_bbox = F.l1_loss(src_rhand_boxes, target_rhand_boxes, reduction='none')
	loss_rhand_bbox = loss_rhand_bbox * target_rhand_boxes_conf[:,None]
	losses['loss_rhand_bbox'] = loss_rhand_bbox.sum() / num_boxes
	loss_rhand_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_rhand_boxes),
	box_ops.box_cxcywh_to_xyxy(target_rhand_boxes)))
	loss_rhand_giou = loss_rhand_giou * target_rhand_boxes_conf
	losses['loss_rhand_giou'] = loss_rhand_giou.sum() / num_boxes

	if 'pred_face_boxes' in outputs and face_hand_box:
	src_face_boxes = outputs['pred_face_boxes'][idx]
	target_face_boxes = torch.cat(
	[t['face_boxes'][i] for t, (_, i) in zip(targets, indices)], dim=0)
	target_face_boxes_conf = torch.cat(
	[t[i] for t, (_, i) in zip(data_batch['face_bbox_valid'], indices)], dim=0)
	loss_face_bbox = F.l1_loss(src_face_boxes, target_face_boxes, reduction='none')
	loss_face_bbox = loss_face_bbox * target_face_boxes_conf[:,None]
	losses['loss_face_bbox'] = loss_face_bbox.sum() / num_boxes
	loss_face_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_face_boxes),
	box_ops.box_cxcywh_to_xyxy(target_face_boxes)))
	loss_face_giou = loss_face_giou * target_face_boxes_conf
	losses['loss_face_giou'] = loss_face_giou.sum() / num_boxes

	if valid_num == 0:
	losses = {}
	if face_hand_box:
	losses = {
	'loss_body_bbox': loss_body_bbox.sum() * 0,
	'loss_body_giou': loss_body_bbox.sum() * 0,
	'loss_lhand_bbox': loss_lhand_bbox.sum() * 0,
	'loss_lhand_giou': loss_lhand_bbox.sum() * 0,
	'loss_rhand_bbox': loss_rhand_bbox.sum() * 0,
	'loss_rhand_giou': loss_rhand_bbox.sum() * 0,
	'loss_face_bbox': loss_face_bbox.sum() * 0,
	'loss_face_giou': loss_face_bbox.sum() * 0,

	}
	else:
	losses = {
	'loss_body_bbox': loss_body_bbox.sum() * 0,
	'loss_body_giou': loss_body_bbox.sum() * 0,
	'loss_lhand_bbox': loss_body_bbox.sum() * 0,
	'loss_lhand_giou': loss_body_bbox.sum() * 0,
	'loss_rhand_bbox': loss_body_bbox.sum() * 0,
	'loss_rhand_giou': loss_body_bbox.sum() * 0,
	'loss_face_bbox': loss_body_bbox.sum() * 0,
	'loss_face_giou': loss_body_bbox.sum() * 0,

	}
	return losses

	return losses

	def loss_dn_boxes(self, outputs, targets, indices, idx, num_boxes,
	data_batch):
	"""
	Input:
	- src_boxes: bs, num_dn, 4
	- tgt_boxes: bs, num_dn, 4

	"""
	indices = indices[0]
	num_tgt = outputs['num_tgt']
	src_boxes = outputs['dn_bbox_pred']
	tgt_boxes = outputs['dn_bbox_input']
	valid_num=0
	for indice in indices[0]:
	valid_num+=len(indice)
	if valid_num == 0:
	device = outputs['pred_logits'].device
	losses = {
	'dn_loss_bbox': src_boxes.sum()*0,
	'dn_loss_giou': src_boxes.sum()*0,
	}
	return losses
	if 'num_tgt' not in outputs:
	device = outputs['pred_logits'].device
	losses = {
	'dn_loss_bbox': src_boxes.sum()*0,
	'dn_loss_giou': src_boxes.sum()*0,
	}
	return losses

	if 'num_tgt' not in outputs:
	device = outputs['pred_logits'].device
	losses = {
	'dn_loss_bbox': src_boxes.sum()*0,
	'dn_loss_giou': src_boxes.sum()*0,
	}
	return losses


	return self.tgt_loss_boxes(src_boxes, tgt_boxes, num_tgt)

	def loss_dn_labels(self, outputs, targets, indices, idx, num_boxes,
	data_batch):
	"""
	Input:
	- src_logits: bs, num_dn, num_classes
	- tgt_labels: bs, num_dn

	"""
	indices = indices[0]
	if 'num_tgt' not in outputs:
	device = outputs['pred_logits'].device
	losses = {
	'dn_loss_ce': outputs['pred_logits'].sum()*0,
	}
	return losses
	valid_num = 0
	for indice in indices[0]:
	valid_num+=len(indice)
	if valid_num == 0:
	device = outputs['pred_logits'].device
	losses = {
	'dn_loss_ce': outputs['pred_logits'].sum()*0,
	}
	return losses
	num_tgt = outputs['num_tgt']
	src_logits = outputs['dn_class_pred'] # bs, num_dn, text_len
	tgt_labels = outputs['dn_class_input']

	return self.tgt_loss_labels(src_logits, tgt_labels, num_tgt)

	@torch.no_grad()
	def loss_matching_cost(self, outputs, targets, indices, idx, num_boxes,
	data_batch):
	"""
	Input:
	- src_logits: bs, num_dn, num_classes
	- tgt_labels: bs, num_dn

	"""
	cost_mean_dict = indices[1]
	losses = {'set_{}'.format(k): v for k, v in cost_mean_dict.items()}
	return losses

	def _get_src_permutation_idx(self, indices):
	# permute predictions following indices
	batch_idx = torch.cat(
	[torch.full_like(src, i) for i, (src, _) in enumerate(indices)])
	src_idx = torch.cat([src for (src, _) in indices])
	return batch_idx, src_idx

	def _get_tgt_permutation_idx(self, indices):
	# permute targets following indices
	batch_idx = torch.cat(
	[torch.full_like(tgt, i) for i, (_, tgt) in enumerate(indices)])
	tgt_idx = torch.cat([tgt for (_, tgt) in indices])
	return batch_idx, tgt_idx

	def get_loss(self, loss, outputs, targets, data_batch, indices, num_boxes,
	**kwargs):
	loss_map = {
	'smpl_pose': self.loss_smpl_pose,
	'smpl_beta': self.loss_smpl_beta,
	'smpl_expr': self.loss_smpl_expr,
	'smpl_kp2d': self.loss_smpl_kp2d,
	'smpl_kp2d_ba': self.loss_smpl_kp2d_ba,
	'smpl_kp3d_ra': self.loss_smpl_kp3d_ra,
	'smpl_kp3d': self.loss_smpl_kp3d,
	'labels': self.loss_labels,
	'cardinality': self.loss_cardinality,
	'boxes': self.loss_boxes,
	'dn_label': self.loss_dn_labels,
	'dn_bbox': self.loss_dn_boxes,
	'matching': self.loss_matching_cost,
	}

	idx = self._get_src_permutation_idx(indices[0])
	# pdb.set_trace()
	assert loss in loss_map, f'do you really want to compute {loss} loss?'
	return loss_map[loss](outputs, targets, indices, idx, num_boxes,
	data_batch, **kwargs)

	def prep_for_dn2(self, mask_dict):
	known_bboxs = mask_dict['known_bboxs']
	known_labels = mask_dict['known_labels']
	output_known_coord = mask_dict['output_known_coord']
	output_known_class = mask_dict['output_known_class']
	num_tgt = mask_dict['pad_size']

	return known_labels, known_bboxs, output_known_class, output_known_coord, num_tgt

	## SMPL losses

	def forward(self, outputs, targets, data_batch, return_indices=False):
	""" This performs the loss computation.
	Parameters:
	outputs: dict of tensors, see the output specification of the model for the format
	targets: list of dicts, such that len(targets) == batch_size.
	The expected keys in each dict depends on the losses applied, see each loss' doc

	return_indices: used for vis. if True, the layer0-5 indices will be returned as well.

	"""
	# import pdb; pdb.set_trace()
	outputs_without_aux = {
	k: v
	for k, v in outputs.items() if k != 'aux_outputs'
	}
	device = next(iter(outputs.values())).device

	# Compute the average number of target boxes accross all nodes, for normalization purposes
	num_boxes = sum(len(t['boxes']) for t in targets)
	num_boxes = torch.as_tensor([num_boxes],
	dtype=torch.float,
	device=device)
	if is_dist_avail_and_initialized():
	torch.distributed.all_reduce(num_boxes)
	num_boxes = torch.clamp(num_boxes / get_world_size(), min=1).item()

	# loss for final layer
	# pdb.set_trace()
	indices = self.matcher(outputs_without_aux, targets)
	if return_indices:
	indices0_copy = indices
	indices_list = []
	losses = {}
	smpl_loss = ['smpl_pose', 'smpl_beta', 'smpl_expr', 'smpl_kp2d',
	'smpl_kp2d_ba', 'smpl_kp3d', 'smpl_kp3d_ra']
	# import pdb; pdb.set_trace()
	for loss in self.losses:
	# print(loss)
	# print(self.get_loss(loss, outputs, targets, indices, num_boxes))
	kwargs = {}

	if loss == 'keypoints' or loss in smpl_loss:
	kwargs.update({'face_hand_kpt': True})
	if loss == 'boxes':
	kwargs.update({'face_hand_box': True})

	losses.update(
	self.get_loss(
	loss, outputs, targets,
	data_batch, indices,
	num_boxes, **kwargs
	))

	# In case of auxiliary losses, we repeat this process with the output of each intermediate layer.
	if 'aux_outputs' in outputs:
	for idx, aux_outputs in enumerate(outputs['aux_outputs']):
	indices = self.matcher(aux_outputs, targets)
	if return_indices:
	indices_list.append(indices)
	for loss in self.losses:
	kwargs = {}
	if loss == 'boxes':
	kwargs.update({'face_hand_box': False})
	if idx >= self.num_box_decoder_layers:
	kwargs.update({'face_hand_box': True})

	if loss == 'masks':
	continue

	if loss == 'keypoints':
	if idx < self.num_box_decoder_layers:
	continue
	elif idx < self.num_hand_face_decoder_layers:
	kwargs.update({'face_hand_kpt': False})
	else:
	kwargs.update({'face_hand_kpt': True})

	if loss in smpl_loss:
	if idx < self.num_box_decoder_layers:
	continue
	elif idx < self.num_hand_face_decoder_layers:
	kwargs.update({'face_hand_kpt': False})
	else:
	kwargs.update({'face_hand_kpt': True})
	if loss == 'labels':
	# Logging is enabled only for the last layer
	kwargs = {'log': False}

	# if loss == 'smpl_expr' and idx < self.num_box_decoder_layers:
	# continue


	# import pdb;pdb.set_trace()
	l_dict = self.get_loss(loss, aux_outputs, targets,
	data_batch, indices, num_boxes,
	**kwargs)
	l_dict = {k + f'_{idx}': v for k, v in l_dict.items()}
	losses.update(l_dict)

	# interm_outputs loss
	if 'interm_outputs' in outputs:
	interm_outputs = outputs['interm_outputs']
	indices = self.matcher(interm_outputs, targets)
	if return_indices:
	indices_list.append(indices)
	for loss in self.losses:
	if loss in ['dn_bbox', 'dn_label', 'keypoints']:
	continue
	if loss in [
	'smpl_pose', 'smpl_beta', 'smpl_kp2d_ba', 'smpl_kp2d',
	'smpl_kp3d_ra', 'smpl_kp3d', 'smpl_expr'
	]:
	continue
	kwargs = {}
	if loss == 'labels':
	kwargs = {'log': False}
	l_dict = self.get_loss(loss, interm_outputs, targets,
	data_batch, indices, num_boxes,
	**kwargs)
	l_dict = {k + f'_interm': v for k, v in l_dict.items()}
	losses.update(l_dict)

	# aux_init loss
	if 'query_expand' in outputs:
	interm_outputs = outputs['query_expand']
	indices = self.matcher(interm_outputs, targets)
	if return_indices:
	indices_list.append(indices)
	for loss in self.losses:
	if loss in ['dn_bbox', 'dn_label']:
	continue
	kwargs = {}

	if loss == 'labels':
	kwargs = {'log': False}
	l_dict = self.get_loss(loss, interm_outputs, targets,
	data_batch, indices, num_boxes,
	**kwargs)
	l_dict = {k + f'_query_expand': v for k, v in l_dict.items()}
	losses.update(l_dict)

	if return_indices:
	indices_list.append(indices0_copy)
	return losses, indices_list

	return losses

	def tgt_loss_boxes(
	self,
	src_boxes,
	tgt_boxes,
	num_tgt,
	):
	"""
	Input:
	- src_boxes: bs, num_dn, 4
	- tgt_boxes: bs, num_dn, 4

	"""

	loss_bbox = F.l1_loss(src_boxes, tgt_boxes, reduction='none')

	losses = {}
	losses['dn_loss_bbox'] = loss_bbox.sum() / num_tgt

	loss_giou = 1 - torch.diag(
	box_ops.generalized_box_iou(
	box_ops.box_cxcywh_to_xyxy(src_boxes.flatten(0, 1)),
	box_ops.box_cxcywh_to_xyxy(tgt_boxes.flatten(0, 1))))
	losses['dn_loss_giou'] = loss_giou.sum() / num_tgt
	return losses

	def tgt_loss_labels(self,
	src_logits: Tensor,
	tgt_labels: Tensor,
	num_tgt: int,
	log: bool = True):
	"""
	Input:
	- src_logits: bs, num_dn, num_classes
	- tgt_labels: bs, num_dn

	"""
	target_classes_onehot = torch.zeros([
	src_logits.shape[0], src_logits.shape[1], src_logits.shape[2] + 1
	],
	dtype=src_logits.dtype,
	layout=src_logits.layout,
	device=src_logits.device)
	target_classes_onehot.scatter_(2, tgt_labels.unsqueeze(-1), 1)
	target_classes_onehot = target_classes_onehot[:, :, :-1]
	loss_ce = sigmoid_focal_loss(src_logits,
	target_classes_onehot,
	num_tgt,
	alpha=self.focal_alpha,
	gamma=2) * src_logits.shape[1]
	losses = {'dn_loss_ce': loss_ce}

	return losses