''' 只crop可扣除一只手的图,没有resize。 example: python utils/crop_resize_no_another_hand.py --video_id 20230818_04_old TODO:检查为什么运行到后面会越来越慢。似乎是卡在了load_bg_img,可以测一下这一行的时间。 re:好像只是单纯有点慢 ''' import os import sys sys.path.append('.') import cv2 import numpy as np import os.path as osp from tqdm import tqdm import pickle import multiprocessing as mlp from utils.hoi_io2 import get_downsampled_seg_infos_batch, get_seg_infos_batch3, load_bg_img, get_downsampled_seg_infos_batch_v2, read_init_crop, cal_represent_frame_list, get_downsampled_seg_infos_batch_v2_acc_batch from utils.scandir import scandir import argparse from time import time import json def crop_from_mask(video_id: str, camera_list: str, frame_list: list[str]): ''' 当前帧如果没有某个mask,则用上一帧的bbox进行crop。 ''' seg, downsample_factor = get_downsampled_seg_infos_batch(video_id, frame_list, camera_list) right_hand_seg = np.where(seg == 1, 1, 0).astype(np.uint8) left_hand_seg = np.where(seg == 2, 1, 0).astype(np.uint8) last_right_min_x = None last_right_max_x = None last_right_min_y = None last_right_max_y = None last_right_meanh = None last_right_meanw = None last_left_min_x = None last_left_max_x = None last_left_min_y = None last_left_max_y = None last_left_meanh = None last_left_meanw = None MAX_HEIGHT = 4095 MAX_WIDTH = 2999 MARGIN_SIZE = 50 # TODO:以下的x和y定义反了,需要更改 for camera_idx, camera_id in enumerate(camera_list): # left_hand_crop_dir = osp.join(root, video_id, 'crop_imgs_left_hand', camera_id) # right_hand_crop_dir = osp.join(root, video_id, 'crop_imgs_right_hand', camera_id) left_hand_crop_dir = osp.join(root, video_id, 'crop', 'left_hand', camera_id) right_hand_crop_dir = osp.join(root, video_id, 'crop', 'right_hand', camera_id) os.makedirs(left_hand_crop_dir, exist_ok=True) os.makedirs(right_hand_crop_dir, exist_ok=True) for frame_idx, frame_id in tqdm(enumerate(frame_list)): mask_right_hand = right_hand_seg[frame_idx, camera_idx] mask_left_hand = left_hand_seg[frame_idx, camera_idx] # upsample mask_right_hand = cv2.resize(mask_right_hand, (4096, 3000), interpolation=cv2.INTER_LINEAR) mask_left_hand = cv2.resize(mask_left_hand, (4096, 3000), interpolation=cv2.INTER_LINEAR) # 读入图片 # timer1 = time() img = load_bg_img(video_id, camera_id, frame_id) # timer2 = time() # print(timer2 - timer1) # 以下为右手 mask_right_hand_idx = np.nonzero(mask_right_hand) if len(mask_right_hand_idx[0]) != 0: right_minh = np.min(mask_right_hand_idx[0]) right_maxh = np.max(mask_right_hand_idx[0]) right_minw = np.min(mask_right_hand_idx[1]) right_maxw = np.max(mask_right_hand_idx[1]) right_midh = (right_maxh + right_minh) // 2 right_midw = (right_maxw + right_minw) // 2 right_meanh = np.mean(mask_right_hand_idx[0]) right_meanw = np.mean(mask_right_hand_idx[1]) right_length = max(right_maxh - right_minh, right_maxw - right_minw) right_min_x = max(0, right_midh - right_length // 2 - MARGIN_SIZE) right_max_x = min(MAX_WIDTH, right_midh + right_length // 2 + MARGIN_SIZE) right_min_y = max(0, right_midw - right_length // 2 - MARGIN_SIZE) right_max_y = min(MAX_HEIGHT, right_midw + right_length // 2 + MARGIN_SIZE) last_right_min_x = right_min_x last_right_max_x = right_max_x last_right_min_y = right_min_y last_right_max_y = right_max_y last_right_meanh = right_meanh last_right_meanw = right_meanw elif frame_idx != 0 and len(mask_right_hand_idx[0]) == 0: right_min_x = last_right_min_x right_max_x = last_right_max_x right_min_y = last_right_min_y right_max_y = last_right_max_y right_meanh = last_right_meanh right_meanw = last_right_meanw # else: # print('no hand mask in the first frame!') # exit(1) else: print('no hand mask in the first frame of this sub_frame_list!') print('right_hand', camera_id, frame_id) if frame_idx == 0 and frame_id != '00001': info_path = osp.join(right_hand_crop_dir, camera_id + '_' + str(int(frame_id)-1).zfill(5) + '_crop_info.pkl') assert os.path.exists(info_path) with open(info_path, 'rb') as f: right_min_x, right_max_x, right_min_y, right_max_y, right_meanh, right_meanw = pickle.load(f) last_right_min_x = right_min_x last_right_max_x = right_max_x last_right_min_y = right_min_y last_right_max_y = right_max_y last_right_meanh = right_meanh last_right_meanw = right_meanw else: print('GG, no hand mask in the first frame!') exit(1) # 将左手变成白色 right_crop = img.copy() right_crop[mask_left_hand == 1] = 255.0 # 居中 right_crop = right_crop[right_min_x: right_max_x, right_min_y: right_max_y] right_crop_info = [right_min_x, right_max_x, right_min_y, right_max_y, right_meanh, right_meanw] with open(osp.join(right_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl'), 'wb') as f: pickle.dump(right_crop_info, f) cv2.imwrite(osp.join(right_hand_crop_dir, camera_id + '_' +frame_id + '.png'), right_crop) # 以下为左手 mask_left_hand_idx = np.nonzero(mask_left_hand) # test_img = np.zeros((3000, 4096, 3)) # for y, x in zip(mask_left_hand_idx[0], mask_left_hand_idx[1]): # cv2.circle(test_img, (x, y), 5, (255, 0, 0), -1) # cv2.imwrite('./test.png',test_img) if len(mask_left_hand_idx[0]) != 0: left_minh = np.min(mask_left_hand_idx[0]) left_maxh = np.max(mask_left_hand_idx[0]) left_minw = np.min(mask_left_hand_idx[1]) left_maxw = np.max(mask_left_hand_idx[1]) left_midh = (left_maxh + left_minh) // 2 left_midw = (left_maxw + left_minw) // 2 left_meanh = np.mean(mask_left_hand_idx[0]) left_meanw = np.mean(mask_left_hand_idx[1]) left_length = max(left_maxh - left_minh, left_maxw - left_minw) left_min_x = max(0, left_midh - left_length // 2 - MARGIN_SIZE) left_max_x = min(MAX_WIDTH, left_midh + left_length // 2 + MARGIN_SIZE) left_min_y = max(0, left_midw - left_length // 2 - MARGIN_SIZE) left_max_y = min(MAX_HEIGHT, left_midw + left_length // 2 + MARGIN_SIZE) last_left_min_x = left_min_x last_left_max_x = left_max_x last_left_min_y = left_min_y last_left_max_y = left_max_y last_left_meanh = left_meanh last_left_meanw = left_meanw elif frame_idx != 0 and len(mask_left_hand_idx[0]) == 0: left_min_x = last_left_min_x left_max_x = last_left_max_x left_min_y = last_left_min_y left_max_y = last_left_max_y left_meanh = last_left_meanh left_meanw = last_left_meanw else: print('no hand mask in the first frame of this sub_frame_list!') print('left_hand', camera_id, frame_id) if frame_idx == 0 and frame_id != '00001': info_path = osp.join(left_hand_crop_dir, camera_id + '_' + str(int(frame_id)-1).zfill(5) + '_crop_info.pkl') assert os.path.exists(info_path) with open(info_path, 'rb') as f: left_min_x, left_max_x, left_min_y, left_max_y, left_meanh, left_meanw = pickle.load(f) last_left_min_x = left_min_x last_left_max_x = left_max_x last_left_min_y = left_min_y last_left_max_y = left_max_y last_left_meanh = left_meanh last_left_meanw = left_meanw else: print(frame_idx, frame_id) print('GG, no hand mask in the first frame!') exit(1) # 将右手变成白色 left_crop = img.copy() left_crop[mask_right_hand == 1] = 255.0 # 居中 left_crop = left_crop[left_min_x: left_max_x, left_min_y: left_max_y] left_crop_info = [left_min_x, left_max_x, left_min_y, left_max_y, left_meanh, left_meanw] with open(osp.join(left_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl'), 'wb') as f: pickle.dump(left_crop_info, f) cv2.imwrite(osp.join(left_hand_crop_dir, camera_id + '_' + frame_id + '.png'), left_crop) def crop(img, min_h, max_h, min_w, max_w, black_min_h=None, black_max_h=None, black_min_w=None, black_max_w=None): img = img.copy() mid_h = (min_h + max_h) // 2 mid_w = (min_w + max_w) // 2 length = max(max_h - min_h, max_w - min_w) if [black_min_h, black_max_h, black_min_w, black_max_w] == [not None, not None, not None, not None]: img[black_min_h: black_max_h, black_min_w, black_max_w] = 255.0 crop = img[min_h: max_h, min_w: max_w] return crop def apply_factor_and_margin(min_h, max_h, min_w, max_w, factor, margin_size, MAX_H, MAX_W): # # avoid overflow # min_h_ = min_h.astype(np.uint16) # max_h_ = max_h.astype(np.uint16) # min_w_ = min_w.astype(np.uint16) # max_w_ = max_w.astype(np.uint16) # mid_h = ((min_h_ + max_h_) // 2).astype(np.uint8) # mid_w = ((min_w_ + max_w_) // 2).astype(np.uint8) mid_h = (min_h + max_h) // 2 mid_w = (min_w + max_w) // 2 length = max(max_h - min_h, max_w - min_w) length = int(length * factor) half_length = length // 2 min_h = max(0, mid_h - half_length - margin_size, 0) max_h = min(MAX_H, mid_h + half_length + margin_size) min_w = max(0, mid_w - half_length - margin_size, 0) max_w = min(MAX_W, mid_w + half_length + margin_size) return min_h, max_h, min_w, max_w, mid_h, mid_w def crop_hand_from_pos(root, video_id: str, camera_id: str, frame_id: str, pos, crop_factor = 1, margin_size = 0): H = 3000 W = 4096 MAX_HEIGHT = H - 1 MAX_WIDTH = W - 1 assert pos.shape == (2, 4) # assert np.all(pos[...] >= 0), print(pos) # assert np.all(pos[..., [0, 1]] <= MAX_HEIGHT), print(pos) # assert np.all(pos[..., [2, 3]] <= MAX_WIDTH), print(pos) if np.all(pos[...] >= 0) and np.all(pos[..., [0, 1]] <= MAX_HEIGHT) and np.all(pos[..., [2, 3]] <= MAX_WIDTH): left_hand_crop_dir = osp.join(root, video_id, 'crop', 'left_hand', camera_id) right_hand_crop_dir = osp.join(root, video_id, 'crop', 'right_hand', camera_id) os.makedirs(left_hand_crop_dir, exist_ok=True) os.makedirs(right_hand_crop_dir, exist_ok=True) bg = load_bg_img(video_id, camera_id, frame_id) # crop info right_crop_info_path = osp.join(right_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl') (right_min_h, right_max_h, right_min_w, right_max_w) = pos[0] right_min_h, right_max_h, right_min_w, right_max_w, right_mid_h, right_mid_w = apply_factor_and_margin(right_min_h, right_max_h, right_min_w, right_max_w, crop_factor, margin_size, MAX_HEIGHT, MAX_WIDTH) right_crop_info = [right_min_h, right_max_h, right_min_w, right_max_w, right_mid_h, right_mid_w] with open(right_crop_info_path, 'wb') as f: pickle.dump(right_crop_info, f) left_crop_info_path = osp.join(left_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl') (left_min_h, left_max_h, left_min_w, left_max_w) = pos[1] left_min_h, left_max_h, left_min_w, left_max_w, left_mid_h, left_mid_w = apply_factor_and_margin(left_min_h, left_max_h, left_min_w, left_max_w, crop_factor, margin_size, MAX_HEIGHT, MAX_WIDTH) left_crop_info = [left_min_h, left_max_h, left_min_w, left_max_w, left_mid_h, left_mid_w] with open(left_crop_info_path, 'wb') as f: pickle.dump(left_crop_info, f) right_crop = crop(bg, right_min_h, right_max_h, right_min_w, right_max_w, left_min_h, left_max_h, left_min_w, left_max_w) right_crop_path = osp.join(right_hand_crop_dir, camera_id + '_' + frame_id + '.png') cv2.imwrite(right_crop_path, right_crop) # left hand left_crop = crop(bg, left_min_h, left_max_h, left_min_w, left_max_w, right_min_h, right_max_h, right_min_w, right_max_w) left_crop_path = osp.join(left_hand_crop_dir, camera_id + '_' + frame_id + '.png') cv2.imwrite(left_crop_path, left_crop) def crop_hand_from_pos_acc(root, local_root, date: str, crop_save_exp_name, crop_info_save_exp_name, video_id: str, camera_list: str, frame_list: str, represent_frame_id, img_batch, pos_batch, crop_factor = 1, margin_size = 0): H = 3000 W = 4096 MAX_HEIGHT = H - 1 MAX_WIDTH = W - 1 # assert pos.shape == (2, 4) # assert np.all(pos[...] >= 0), print(pos) # assert np.all(pos[..., [0, 1]] <= MAX_HEIGHT), print(pos) # assert np.all(pos[..., [2, 3]] <= MAX_WIDTH), print(pos) # TODO right_crop_info_batch = {} left_crop_info_batch = {} for f_idx, frame_id in enumerate(frame_list): right_crop_info_batch[frame_id] = {} left_crop_info_batch[frame_id] = {} for c_idx, camera_id in enumerate(camera_list): if camera_id not in pos_batch[frame_id].keys(): continue pos = pos_batch[frame_id][camera_id] if np.all(pos[...] >= 0) and np.all(pos[..., [0, 1]] <= MAX_HEIGHT) and np.all(pos[..., [2, 3]] <= MAX_WIDTH): left_hand_crop_dir = osp.join(local_root, date, video_id, crop_save_exp_name, 'left_hand', camera_id) right_hand_crop_dir = osp.join(local_root, date, video_id, crop_save_exp_name, 'right_hand', camera_id) os.makedirs(left_hand_crop_dir, exist_ok=True) os.makedirs(right_hand_crop_dir, exist_ok=True) bg = img_batch[f_idx][c_idx].copy() # crop info (right_min_h, right_max_h, right_min_w, right_max_w) = pos[0] right_min_h, right_max_h, right_min_w, right_max_w, right_mid_h, right_mid_w = apply_factor_and_margin(right_min_h, right_max_h, right_min_w, right_max_w, crop_factor, margin_size, MAX_HEIGHT, MAX_WIDTH) right_crop_info = [right_min_h, right_max_h, right_min_w, right_max_w, right_mid_h, right_mid_w] right_crop_info_batch[frame_id][camera_id] = right_crop_info # right_crop_info_path = osp.join(right_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl') # with open(right_crop_info_path, 'wb') as f: # pickle.dump(right_crop_info, f) (left_min_h, left_max_h, left_min_w, left_max_w) = pos[1] left_min_h, left_max_h, left_min_w, left_max_w, left_mid_h, left_mid_w = apply_factor_and_margin(left_min_h, left_max_h, left_min_w, left_max_w, crop_factor, margin_size, MAX_HEIGHT, MAX_WIDTH) left_crop_info = [left_min_h, left_max_h, left_min_w, left_max_w, left_mid_h, left_mid_w] left_crop_info_batch[frame_id][camera_id] = left_crop_info # left_crop_info_path = osp.join(left_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl') # with open(left_crop_info_path, 'wb') as f: # pickle.dump(left_crop_info, f) right_crop = crop(bg, right_min_h, right_max_h, right_min_w, right_max_w, left_min_h, left_max_h, left_min_w, left_max_w) right_crop_path = osp.join(right_hand_crop_dir, camera_id + '_' + frame_id + '.png') cv2.imwrite(right_crop_path, right_crop) # left hand left_crop = crop(bg, left_min_h, left_max_h, left_min_w, left_max_w, right_min_h, right_max_h, right_min_w, right_max_w) left_crop_path = osp.join(left_hand_crop_dir, camera_id + '_' + frame_id + '.png') cv2.imwrite(left_crop_path, left_crop) right_crop_info_dir = osp.join(local_root, date, video_id, crop_info_save_exp_name, 'right_hand') os.makedirs(right_crop_info_dir, exist_ok=True) left_crop_info_dir = osp.join(local_root, date, video_id, crop_info_save_exp_name, 'left_hand') os.makedirs(left_crop_info_dir, exist_ok=True) right_crop_info_path = osp.join(right_crop_info_dir, represent_frame_id + '_crop_info.pkl') with open(right_crop_info_path, 'wb') as f: pickle.dump(right_crop_info_batch, f) left_crop_info_path = osp.join(left_crop_info_dir, represent_frame_id + '_crop_info.pkl') with open(left_crop_info_path, 'wb') as f: pickle.dump(left_crop_info_batch, f) def crop_from_init(root, date, video_id: str, camera_list: str, crop_factor = 1, margin_size = 0): ''' 手动标注第一二帧,从src中读取标注数据。 crop一只手时,将另一只手的bbox所在区域全部置为黑。 TODO: pos的shape有问题,要改 ''' frame_list = [str(i).zfill(5) for i in range(1, 3)] for camera_idx, camera_id in enumerate(camera_list): left_hand_crop_dir = osp.join(root, date, video_id, 'crop', 'left_hand', camera_id) right_hand_crop_dir = osp.join(root, date, video_id, 'crop', 'right_hand', camera_id) os.makedirs(left_hand_crop_dir, exist_ok=True) os.makedirs(right_hand_crop_dir, exist_ok=True) for frame_idx, frame_id in enumerate(frame_list): pos = read_init_crop(video_id, camera_id, frame_id) crop_hand_from_pos(video_id, camera_id, frame_id, pos, crop_factor, margin_size) def crop_from_mask_v2(root, video_id: str, from_exp_name, camera_list: str, frame_list: list[str], rgb_batch, crop_factor = 1, margin_size = 50): # for camera_id in camera_list: # left_hand_crop_dir = os.path.join(root, video_id, 'crop', 'left_hand', camera_id) # right_hand_crop_dir = os.path.join(root, video_id, 'crop', 'right_hand', camera_id) # os.makedirs(left_hand_crop_dir, exist_ok=True) # os.makedirs(right_hand_crop_dir, exist_ok=True) seg, downsample_factor = get_downsampled_seg_infos_batch_v2(video_id, from_exp_name, frame_list, camera_list) right_hand_seg = np.where(seg == 1, 1, 0).astype(np.uint8) left_hand_seg = np.where(seg == 2, 1, 0).astype(np.uint8) right_hand_valid_camera_dict = {} left_hand_valid_camera_dict = {} for frame in frame_list: right_hand_valid_camera_dict[frame] = [] left_hand_valid_camera_dict[frame] = [] for camera_idx, camera_id in enumerate(camera_list): left_hand_crop_dir = os.path.join(root, video_id, 'crop', 'left_hand', camera_id) right_hand_crop_dir = os.path.join(root, video_id, 'crop', 'right_hand', camera_id) os.makedirs(left_hand_crop_dir, exist_ok=True) os.makedirs(right_hand_crop_dir, exist_ok=True) for frame_idx, frame_id in tqdm(enumerate(frame_list)): mask_right_hand = right_hand_seg[frame_idx, camera_idx] mask_left_hand = left_hand_seg[frame_idx, camera_idx] # 读入图片 img = load_bg_img(video_id, camera_id, frame_id) # H = 3000 # W = 4096 (H, W) = img.shape[:2] MAX_WIDTH = W - 1 MAX_HEIGHT = H - 1 # upsample mask mask_right_hand = cv2.resize(mask_right_hand, (W, H), interpolation=cv2.INTER_NEAREST) mask_left_hand = cv2.resize(mask_left_hand, (W, H), interpolation=cv2.INTER_NEAREST) # 以下为右手 mask_right_hand_idx = np.nonzero(mask_right_hand) if len(mask_right_hand_idx[0]) != 0: right_hand_valid_camera_dict[frame_id].append(camera_id) right_minh = np.min(mask_right_hand_idx[0]) right_maxh = np.max(mask_right_hand_idx[0]) right_minw = np.min(mask_right_hand_idx[1]) right_maxw = np.max(mask_right_hand_idx[1]) right_midh = (right_maxh + right_minh) // 2 right_midw = (right_maxw + right_minw) // 2 right_meanh = np.mean(mask_right_hand_idx[0]) right_meanw = np.mean(mask_right_hand_idx[1]) right_length = int(max(right_maxh - right_minh, right_maxw - right_minw) * crop_factor) half_length = right_length // 2 right_min_y = max(0, right_midh - half_length - margin_size) right_max_y = min(MAX_HEIGHT, right_midh + half_length + margin_size) right_min_x = max(0, right_midw - half_length - margin_size) right_max_x = min(MAX_WIDTH, right_midw + half_length + margin_size) # 将左手变成白色 right_crop = img.copy() right_crop[mask_left_hand == 1] = 255.0 # 居中 right_crop = right_crop[right_min_y: right_max_y, right_min_x: right_max_x] right_crop_info = [right_min_y, right_max_y, right_min_x, right_max_x, right_meanh, right_meanw] with open(os.path.join(right_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl'), 'wb') as f: pickle.dump(right_crop_info, f) cv2.imwrite(os.path.join(right_hand_crop_dir, camera_id + '_' +frame_id + '.png'), right_crop) # 以下为左手 mask_left_hand_idx = np.nonzero(mask_left_hand) if len(mask_left_hand_idx[0]) != 0: left_hand_valid_camera_dict[frame_id].append(camera_id) left_minh = np.min(mask_left_hand_idx[0]) left_maxh = np.max(mask_left_hand_idx[0]) left_minw = np.min(mask_left_hand_idx[1]) left_maxw = np.max(mask_left_hand_idx[1]) left_midh = (left_maxh + left_minh) // 2 left_midw = (left_maxw + left_minw) // 2 left_meanh = np.mean(mask_left_hand_idx[0]) left_meanw = np.mean(mask_left_hand_idx[1]) left_length = max(left_maxh - left_minh, left_maxw - left_minw) half_length = left_length // 2 left_min_y = max(0, left_midh - half_length - margin_size) left_max_y = min(MAX_HEIGHT, left_midh + half_length + margin_size) left_min_x = max(0, left_midw - half_length - margin_size) left_max_x = min(MAX_WIDTH, left_midw + half_length + margin_size) # 将右手变成白色 left_crop = img.copy() left_crop[mask_right_hand == 1] = 255.0 # 居中 left_crop = left_crop[left_min_y: left_max_y, left_min_x: left_max_x] left_crop_info = [left_min_y, left_max_y, left_min_x, left_max_x, left_meanh, left_meanw] with open(os.path.join(left_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl'), 'wb') as f: pickle.dump(left_crop_info, f) cv2.imwrite(os.path.join(left_hand_crop_dir, camera_id + '_' + frame_id + '.png'), left_crop) # save valid and invalid info right_hand_invalid_camera_dict = {} left_hand_invalid_camera_dict = {} for frame in frame_list: right_hand_invalid_camera_dict[frame] = [camera for camera in camera_list if camera not in right_hand_valid_camera_dict[frame]] left_hand_invalid_camera_dict[frame] = [camera for camera in camera_list if camera not in left_hand_valid_camera_dict[frame]] log_path = os.path.join(root, video_id, 'crop', 'log.json') if os.path.exists(log_path): with open(log_path, 'r') as f: log = json.load(f) log['right_hand_valid_camera_dict'].update(right_hand_valid_camera_dict) log['left_hand_valid_camera_dict'].update(left_hand_valid_camera_dict) log['right_hand_invalid_camera_dict'].update(right_hand_invalid_camera_dict) log['left_hand_invalid_camera_dict'].update(left_hand_invalid_camera_dict) else: log = {} log['right_hand_valid_camera_dict'] = right_hand_valid_camera_dict log['left_hand_valid_camera_dict'] = left_hand_valid_camera_dict log['right_hand_invalid_camera_dict'] = right_hand_invalid_camera_dict log['left_hand_invalid_camera_dict'] = left_hand_invalid_camera_dict with open(log_path, 'w') as f: json.dump(log, f) return right_hand_valid_camera_dict, left_hand_valid_camera_dict, right_hand_invalid_camera_dict, left_hand_invalid_camera_dict def crop_from_mask_v3(root, video_id: str, from_exp_name, camera_list: str, frame_list: list[str], rgb_batch, crop_factor = 1, margin_size = 50): ''' 增加了rgb_batch,可以不用从本地再读图片。 ''' # for camera_id in camera_list: # left_hand_crop_dir = os.path.join(root, video_id, 'crop', 'left_hand', camera_id) # right_hand_crop_dir = os.path.join(root, video_id, 'crop', 'right_hand', camera_id) # os.makedirs(left_hand_crop_dir, exist_ok=True) # os.makedirs(right_hand_crop_dir, exist_ok=True) seg, downsample_factor = get_downsampled_seg_infos_batch_v2(video_id, from_exp_name, frame_list, camera_list) right_hand_seg = np.where(seg == 1, 1, 0).astype(np.uint8) left_hand_seg = np.where(seg == 2, 1, 0).astype(np.uint8) right_hand_valid_camera_dict = {} left_hand_valid_camera_dict = {} for frame in frame_list: right_hand_valid_camera_dict[frame] = [] left_hand_valid_camera_dict[frame] = [] for camera_idx, camera_id in enumerate(camera_list): left_hand_crop_dir = os.path.join(root, video_id, 'crop', 'left_hand', camera_id) right_hand_crop_dir = os.path.join(root, video_id, 'crop', 'right_hand', camera_id) os.makedirs(left_hand_crop_dir, exist_ok=True) os.makedirs(right_hand_crop_dir, exist_ok=True) for frame_idx, frame_id in tqdm(enumerate(frame_list)): mask_right_hand = right_hand_seg[frame_idx, camera_idx] mask_left_hand = left_hand_seg[frame_idx, camera_idx] # 读入图片 # img = rgb_batch[int(frame_id)-1,camera_idx].copy() # img = rgb_batch[int(frame_id)-1][camera_idx].copy() img = rgb_batch[frame_idx][camera_idx].copy() # img = load_bg_img(video_id, camera_id, frame_id) # H = 3000 # W = 4096 (H, W) = img.shape[:2] MAX_WIDTH = W - 1 MAX_HEIGHT = H - 1 # upsample mask mask_right_hand = cv2.resize(mask_right_hand, (W, H), interpolation=cv2.INTER_NEAREST) mask_left_hand = cv2.resize(mask_left_hand, (W, H), interpolation=cv2.INTER_NEAREST) # 以下为右手 mask_right_hand_idx = np.nonzero(mask_right_hand) if len(mask_right_hand_idx[0]) != 0: right_hand_valid_camera_dict[frame_id].append(camera_id) right_minh = np.min(mask_right_hand_idx[0]) right_maxh = np.max(mask_right_hand_idx[0]) right_minw = np.min(mask_right_hand_idx[1]) right_maxw = np.max(mask_right_hand_idx[1]) right_midh = (right_maxh + right_minh) // 2 right_midw = (right_maxw + right_minw) // 2 right_meanh = np.mean(mask_right_hand_idx[0]) right_meanw = np.mean(mask_right_hand_idx[1]) right_length = int(max(right_maxh - right_minh, right_maxw - right_minw) * crop_factor) half_length = right_length // 2 right_min_y = max(0, right_midh - half_length - margin_size) right_max_y = min(MAX_HEIGHT, right_midh + half_length + margin_size) right_min_x = max(0, right_midw - half_length - margin_size) right_max_x = min(MAX_WIDTH, right_midw + half_length + margin_size) # 将左手变成白色 right_crop = img.copy() right_crop[mask_left_hand == 1] = 255.0 # 居中 right_crop = right_crop[right_min_y: right_max_y, right_min_x: right_max_x] right_crop_info = [right_min_y, right_max_y, right_min_x, right_max_x, right_meanh, right_meanw] with open(os.path.join(right_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl'), 'wb') as f: pickle.dump(right_crop_info, f) cv2.imwrite(os.path.join(right_hand_crop_dir, camera_id + '_' +frame_id + '.png'), right_crop) # 以下为左手 mask_left_hand_idx = np.nonzero(mask_left_hand) if len(mask_left_hand_idx[0]) != 0: left_hand_valid_camera_dict[frame_id].append(camera_id) left_minh = np.min(mask_left_hand_idx[0]) left_maxh = np.max(mask_left_hand_idx[0]) left_minw = np.min(mask_left_hand_idx[1]) left_maxw = np.max(mask_left_hand_idx[1]) left_midh = (left_maxh + left_minh) // 2 left_midw = (left_maxw + left_minw) // 2 left_meanh = np.mean(mask_left_hand_idx[0]) left_meanw = np.mean(mask_left_hand_idx[1]) left_length = max(left_maxh - left_minh, left_maxw - left_minw) half_length = left_length // 2 left_min_y = max(0, left_midh - half_length - margin_size) left_max_y = min(MAX_HEIGHT, left_midh + half_length + margin_size) left_min_x = max(0, left_midw - half_length - margin_size) left_max_x = min(MAX_WIDTH, left_midw + half_length + margin_size) # 将右手变成白色 left_crop = img.copy() left_crop[mask_right_hand == 1] = 255.0 # 居中 left_crop = left_crop[left_min_y: left_max_y, left_min_x: left_max_x] left_crop_info = [left_min_y, left_max_y, left_min_x, left_max_x, left_meanh, left_meanw] with open(os.path.join(left_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl'), 'wb') as f: pickle.dump(left_crop_info, f) cv2.imwrite(os.path.join(left_hand_crop_dir, camera_id + '_' + frame_id + '.png'), left_crop) # save valid and invalid info right_hand_invalid_camera_dict = {} left_hand_invalid_camera_dict = {} for frame in frame_list: right_hand_invalid_camera_dict[frame] = [camera for camera in camera_list if camera not in right_hand_valid_camera_dict[frame]] left_hand_invalid_camera_dict[frame] = [camera for camera in camera_list if camera not in left_hand_valid_camera_dict[frame]] log_path = os.path.join(root, video_id, 'crop', 'log.json') if os.path.exists(log_path): with open(log_path, 'r') as f: log = json.load(f) log['right_hand_valid_camera_dict'].update(right_hand_valid_camera_dict) log['left_hand_valid_camera_dict'].update(left_hand_valid_camera_dict) log['right_hand_invalid_camera_dict'].update(right_hand_invalid_camera_dict) log['left_hand_invalid_camera_dict'].update(left_hand_invalid_camera_dict) else: log = {} log['right_hand_valid_camera_dict'] = right_hand_valid_camera_dict log['left_hand_valid_camera_dict'] = left_hand_valid_camera_dict log['right_hand_invalid_camera_dict'] = right_hand_invalid_camera_dict log['left_hand_invalid_camera_dict'] = left_hand_invalid_camera_dict with open(log_path, 'w') as f: json.dump(log, f) return right_hand_valid_camera_dict, left_hand_valid_camera_dict, right_hand_invalid_camera_dict, left_hand_invalid_camera_dict def crop_from_mask_v3_acc_batch(root, local_root, date, video_id: str, from_exp_name, camera_list: str, represent_frame_id, frame_list: list[str], BATCH_SIZE, rgb_batch, crop_factor = 1, margin_size = 50): ''' 增加了rgb_batch,可以不用从本地再读图片。 每一个batch的文件分左右手写在一起。 ''' seg, downsample_factor = get_downsampled_seg_infos_batch_v2_acc_batch(root, date, video_id, from_exp_name, frame_list, camera_list, represent_frame_id=represent_frame_id) right_hand_seg = np.where(seg == 1, 1, 0).astype(np.uint8) left_hand_seg = np.where(seg == 2, 1, 0).astype(np.uint8) right_hand_valid_camera_dict = {} left_hand_valid_camera_dict = {} for frame in frame_list: right_hand_valid_camera_dict[frame] = [] left_hand_valid_camera_dict[frame] = [] # represent_relation = cal_represent_frame_list(BATCH_SIZE, frame_list) # represent_keys = list(represent_relation.keys()) # assert len(represent_keys) == 1 # represent_frame_id = represent_keys[0] left_crop_info_batch = {} left_crop_batch = {} right_crop_info_batch = {} right_crop_batch = {} for camera_idx, camera_id in enumerate(camera_list): left_hand_crop_dir = os.path.join(local_root, date, video_id, 'crop', 'left_hand', camera_id) right_hand_crop_dir = os.path.join(local_root, date, video_id, 'crop', 'right_hand', camera_id) os.makedirs(left_hand_crop_dir, exist_ok=True) os.makedirs(right_hand_crop_dir, exist_ok=True) for frame_idx, frame_id in tqdm(enumerate(frame_list)): mask_right_hand = right_hand_seg[frame_idx, camera_idx] mask_left_hand = left_hand_seg[frame_idx, camera_idx] # 读入图片 img = rgb_batch[frame_idx][camera_idx].copy() # H = 3000 # W = 4096 (H, W) = img.shape[:2] MAX_WIDTH = W - 1 MAX_HEIGHT = H - 1 # upsample mask mask_right_hand = cv2.resize(mask_right_hand, (W, H), interpolation=cv2.INTER_NEAREST) mask_left_hand = cv2.resize(mask_left_hand, (W, H), interpolation=cv2.INTER_NEAREST) # 以下为右手 mask_right_hand_idx = np.nonzero(mask_right_hand) if len(mask_right_hand_idx[0]) != 0: right_hand_valid_camera_dict[frame_id].append(camera_id) right_minh = np.min(mask_right_hand_idx[0]) right_maxh = np.max(mask_right_hand_idx[0]) right_minw = np.min(mask_right_hand_idx[1]) right_maxw = np.max(mask_right_hand_idx[1]) right_midh = (right_maxh + right_minh) // 2 right_midw = (right_maxw + right_minw) // 2 right_meanh = np.mean(mask_right_hand_idx[0]) right_meanw = np.mean(mask_right_hand_idx[1]) right_length = int(max(right_maxh - right_minh, right_maxw - right_minw) * crop_factor) half_length = right_length // 2 right_min_y = max(0, right_midh - half_length - margin_size) right_max_y = min(MAX_HEIGHT, right_midh + half_length + margin_size) right_min_x = max(0, right_midw - half_length - margin_size) right_max_x = min(MAX_WIDTH, right_midw + half_length + margin_size) # 将左手变成白色 right_crop = img.copy() right_crop[mask_left_hand == 1] = 255.0 # 居中 right_crop = right_crop[right_min_y: right_max_y, right_min_x: right_max_x] right_crop_info = [right_min_y, right_max_y, right_min_x, right_max_x, right_meanh, right_meanw] if frame_id not in right_crop_info_batch: right_crop_info_batch[frame_id] = {} right_crop_info_batch[frame_id][camera_id] = right_crop_info if frame_id not in right_crop_batch: right_crop_batch[frame_id] = {} right_crop_batch[frame_id][camera_id] = right_crop # with open(os.path.join(right_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl'), 'wb') as f: # pickle.dump(right_crop_info, f) cv2.imwrite(os.path.join(right_hand_crop_dir, camera_id + '_' +frame_id + '.png'), right_crop) # 以下为左手 mask_left_hand_idx = np.nonzero(mask_left_hand) if len(mask_left_hand_idx[0]) != 0: left_hand_valid_camera_dict[frame_id].append(camera_id) left_minh = np.min(mask_left_hand_idx[0]) left_maxh = np.max(mask_left_hand_idx[0]) left_minw = np.min(mask_left_hand_idx[1]) left_maxw = np.max(mask_left_hand_idx[1]) left_midh = (left_maxh + left_minh) // 2 left_midw = (left_maxw + left_minw) // 2 left_meanh = np.mean(mask_left_hand_idx[0]) left_meanw = np.mean(mask_left_hand_idx[1]) left_length = max(left_maxh - left_minh, left_maxw - left_minw) half_length = left_length // 2 left_min_y = max(0, left_midh - half_length - margin_size) left_max_y = min(MAX_HEIGHT, left_midh + half_length + margin_size) left_min_x = max(0, left_midw - half_length - margin_size) left_max_x = min(MAX_WIDTH, left_midw + half_length + margin_size) # 将右手变成白色 left_crop = img.copy() left_crop[mask_right_hand == 1] = 255.0 # 居中 left_crop = left_crop[left_min_y: left_max_y, left_min_x: left_max_x] left_crop_info = [left_min_y, left_max_y, left_min_x, left_max_x, left_meanh, left_meanw] if frame_id not in left_crop_info_batch: left_crop_info_batch[frame_id] = {} left_crop_info_batch[frame_id][camera_id] = left_crop_info if frame_id not in left_crop_batch: left_crop_batch[frame_id] = {} left_crop_batch[frame_id][camera_id] = left_crop # with open(os.path.join(left_hand_crop_dir, camera_id + '_' + frame_id + '_crop_info.pkl'), 'wb') as f: # pickle.dump(left_crop_info, f) cv2.imwrite(os.path.join(left_hand_crop_dir, camera_id + '_' + frame_id + '.png'), left_crop) # save valid and invalid info right_hand_invalid_camera_dict = {} left_hand_invalid_camera_dict = {} for frame in frame_list: right_hand_invalid_camera_dict[frame] = [camera for camera in camera_list if camera not in right_hand_valid_camera_dict[frame]] left_hand_invalid_camera_dict[frame] = [camera for camera in camera_list if camera not in left_hand_valid_camera_dict[frame]] left_hand_crop_dir = os.path.join(local_root, date, video_id, 'crop_batch', 'left_hand') right_hand_crop_dir = os.path.join(local_root, date, video_id, 'crop_batch', 'right_hand') os.makedirs(left_hand_crop_dir, exist_ok=True) os.makedirs(right_hand_crop_dir, exist_ok=True) with open(os.path.join(right_hand_crop_dir, represent_frame_id + '_crop_info.pkl'), 'wb') as f: pickle.dump(right_crop_info_batch, f) with open(os.path.join(right_hand_crop_dir, represent_frame_id + '_crop.pkl'), 'wb') as f: pickle.dump(right_crop_batch, f) with open(os.path.join(left_hand_crop_dir, represent_frame_id + '_crop_info.pkl'), 'wb') as f: pickle.dump(left_crop_info_batch, f) with open(os.path.join(left_hand_crop_dir, represent_frame_id + '_crop.pkl'), 'wb') as f: pickle.dump(left_crop_batch, f) # 寻思这log也没人看啊 # log_path = os.path.join(root, video_id, 'crop', 'log.json') # if os.path.exists(log_path): # with open(log_path, 'r') as f: # log = json.load(f) # log['right_hand_valid_camera_dict'].update(right_hand_valid_camera_dict) # log['left_hand_valid_camera_dict'].update(left_hand_valid_camera_dict) # log['right_hand_invalid_camera_dict'].update(right_hand_invalid_camera_dict) # log['left_hand_invalid_camera_dict'].update(left_hand_invalid_camera_dict) # else: # log = {} # log['right_hand_valid_camera_dict'] = right_hand_valid_camera_dict # log['left_hand_valid_camera_dict'] = left_hand_valid_camera_dict # log['right_hand_invalid_camera_dict'] = right_hand_invalid_camera_dict # log['left_hand_invalid_camera_dict'] = left_hand_invalid_camera_dict # with open(log_path, 'w') as f: # json.dump(log, f) return right_hand_valid_camera_dict, left_hand_valid_camera_dict, right_hand_invalid_camera_dict, left_hand_invalid_camera_dict if __name__ == "__main__": camera_list = ['22070938', '22139905', '22139909', '22139910', '22139911', '22139913', '22139916', '22139946'] # camera_list = ['22139911'] parser = argparse.ArgumentParser() parser.add_argument('--video_id', required=True, type=str) args = parser.parse_args() video_id = args.video_id root = '/share/datasets/HOI-mocap' date = video_id[:8] img_dir = osp.join(root, date, video_id, 'imgs', camera_list[0]) # 默认每个视角的frame数相同 assert os.path.isdir(img_dir) img_filename_list = list(scandir(img_dir, 'png')) frame_list = [] for img_filename in img_filename_list: frame_id = img_filename[-9:-4] frame_list.append(frame_id) procs = [] for camera_id in camera_list: args = (video_id, [camera_id], frame_list) proc = mlp.Process(target=crop_from_mask, args=args) proc.start() procs.append(proc) for i in range(len(procs)): procs[i].join() if __name__ == '__main__': camera_list = ['22070938', '22139905', '22139909', '22139910', '22139911', '22139916', '22139946'] frame_list = [str(i).zfill(5) for i in range(1,3)] crop_from_init('20230904_01', camera_list, frame_list)