File size: 9,664 Bytes
d7e58f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import warnings
from typing import Iterable, List, Optional, Tuple, Union

import numpy as np

import detrsmpl.core.conventions.keypoints_mapping as keypoints_mapping
from detrsmpl.core.renderer.matplotlib3d_renderer import Axes3dJointsRenderer
from detrsmpl.utils.demo_utils import get_different_colors
from detrsmpl.utils.keypoint_utils import search_limbs
from detrsmpl.utils.path_utils import prepare_output_path


def _norm_pose(pose_numpy: np.ndarray, min_value: Union[float, int],
               max_value: Union[float, int], mask: Union[np.ndarray, list]):
    """Normalize the poses and make the center close to axis center."""
    assert max_value > min_value
    pose_np_normed = pose_numpy.copy()
    if not mask:
        mask = list(range(pose_numpy.shape[-2]))
    axis_num = 3
    axis_stat = np.zeros(shape=[axis_num, 4])
    for axis_index in range(axis_num):
        axis_data = pose_np_normed[..., mask, axis_index]
        axis_min = np.min(axis_data)
        axis_max = np.max(axis_data)
        axis_mid = (axis_min + axis_max) / 2.0
        axis_span = axis_max - axis_min
        axis_stat[axis_index] = np.asarray(
            (axis_min, axis_max, axis_mid, axis_span))
    target_mid = (max_value + min_value) / 2.0
    max_span = np.max(axis_stat[:, 3])
    target_span = max_value - min_value
    for axis_index in range(axis_num):
        pose_np_normed[..., axis_index] = \
            pose_np_normed[..., axis_index] - \
            axis_stat[axis_index, 2]
    pose_np_normed = pose_np_normed / max_span * target_span
    pose_np_normed = pose_np_normed + target_mid
    return pose_np_normed


def visualize_kp3d(
    kp3d: np.ndarray,
    output_path: Optional[str] = None,
    limbs: Optional[Union[np.ndarray, List[int]]] = None,
    palette: Optional[Iterable[int]] = None,
    data_source: str = 'coco',
    mask: Optional[Union[list, tuple, np.ndarray]] = None,
    start: int = 0,
    end: Optional[int] = None,
    resolution: Union[list, Tuple[int, int]] = (1024, 1024),
    fps: Union[float, int] = 30,
    frame_names: Optional[Union[List[str], str]] = None,
    orbit_speed: Union[float, int] = 0.5,
    value_range: Union[Tuple[int, int], list] = (-100, 100),
    pop_parts: Iterable[str] = (),
    disable_limbs: bool = False,
    return_array: Optional[bool] = None,
    convention: str = 'opencv',
    keypoints_factory: dict = keypoints_mapping.KEYPOINTS_FACTORY,
) -> Union[None, np.ndarray]:
    """Visualize 3d keypoints to a video with matplotlib. Support multi person
    and specified limb connections.

    Args:
        kp3d (np.ndarray): shape could be (f * J * 4/3/2) or
            (f * num_person * J * 4/3/2)
        output_path (str): output video path image folder.
        limbs (Optional[Union[np.ndarray, List[int]]], optional):
            if not specified, the limbs will be searched by search_limbs,
            this option is for free skeletons like BVH file.
            Defaults to None.
        palette (Iterable, optional): specified palette, three int represents
            (B, G, R). Should be tuple or list.
            Defaults to None.
        data_source (str, optional): data source type. Defaults to 'coco'.
            choose in ['coco', 'smplx', 'smpl', 'coco_wholebody',
            'mpi_inf_3dhp', 'mpi_inf_3dhp_test', 'h36m', 'pw3d', 'mpii']
        mask (Optional[Union[list, tuple, np.ndarray]], optional):
            mask to mask out the incorrect points. Defaults to None.
        start (int, optional): start frame index. Defaults to 0.
        end (int, optional): end frame index.
            Could be positive int or negative int or None.
            None represents include all the frames.
            Defaults to None.
        resolution (Union[list, Tuple[int, int]], optional):
            (width, height) of the output video
            will be the same size as the original images if not specified.
            Defaults to None.
        fps (Union[float, int], optional): fps. Defaults to 30.
        frame_names (Optional[Union[List[str], str]], optional): List(should be
            the same as frame numbers) or single string or string format
            (like 'frame%06d')for frame title, no title if None.
            Defaults to None.
        orbit_speed (Union[float, int], optional): orbit speed of camera.
            Defaults to 0.5.
        value_range (Union[Tuple[int, int], list], optional):
            range of axis value. Defaults to (-100, 100).
        pop_parts (Iterable[str], optional): The body part names you do not
            want to visualize. Choose in ['left_eye','right_eye', 'nose',
            'mouth', 'face', 'left_hand', 'right_hand']Defaults to [].
        disable_limbs (bool, optional): whether need to disable drawing limbs.
            Defaults to False.
        return_array (bool, optional): Whether to return images as opencv array
            .If None, an array will be returned when frame number is below 100.
            Defaults to None.
        keypoints_factory (dict, optional): Dict of all the conventions.
            Defaults to KEYPOINTS_FACTORY.
    Raises:
        TypeError: check the type of input keypoints.
        FileNotFoundError: check the output video path.

    Returns:
        Union[None, np.ndarray].
    """
    # check input shape
    if not isinstance(kp3d, np.ndarray):
        raise TypeError(
            f'Input type is {type(kp3d)}, which should be numpy.ndarray.')
    kp3d = kp3d.copy()
    if kp3d.shape[-1] == 2:
        kp3d = np.concatenate([kp3d, np.zeros_like(kp3d)[..., 0:1]], axis=-1)
        warnings.warn(
            'The input array is 2-Dimensional coordinates, will concatenate ' +
            f'zeros to the last axis. The new array shape: {kp3d.shape}')
    elif kp3d.shape[-1] >= 4:
        kp3d = kp3d[..., :3]
        warnings.warn(
            'The input array has more than 3-Dimensional coordinates, will ' +
            'keep only the first 3-Dimensions of the last axis. The new ' +
            f'array shape: {kp3d.shape}')
    if kp3d.ndim == 3:
        kp3d = np.expand_dims(kp3d, 1)
    num_frames = kp3d.shape[0]
    assert kp3d.ndim == 4
    assert kp3d.shape[-1] == 3

    if return_array is None:
        if num_frames > 100:
            return_array = False
        else:
            return_array = True

    # check data_source & mask
    if data_source not in keypoints_factory:
        raise ValueError('Wrong data_source. Should choose in' +
                         f'{list(keypoints_factory.keys())}')
    if mask is not None:
        if not isinstance(mask, np.ndarray):
            mask = np.array(mask).reshape(-1)
        assert mask.shape == (
            len(keypoints_factory[data_source]),
        ), f'mask length should fit with keypoints number \
            {len(keypoints_factory[data_source])}'

    # check the output path
    if output_path is not None:
        prepare_output_path(output_path,
                            path_type='auto',
                            tag='output video',
                            allowed_suffix=['.mp4', '.gif', ''])

    # slice the frames
    end = num_frames if end is None else end
    kp3d = kp3d[start:end]
    # norm the coordinates
    if value_range is not None:
        # norm pose location to value_range (70% value range)
        mask_index = np.where(np.array(mask) > 0) if mask is not None else None
        margin_width = abs(value_range[1] - value_range[1]) * 0.15
        pose_np_normed = _norm_pose(kp3d, value_range[0] + margin_width,
                                    value_range[1] - margin_width, mask_index)
        input_pose_np = pose_np_normed
    else:
        input_pose_np = kp3d

    # determine the limb connections and palettes
    if limbs is not None:
        limbs_target, limbs_palette = {
            'body': limbs.tolist() if isinstance(limbs, np.ndarray) else limbs
        }, get_different_colors(len(limbs))
    else:
        limbs_target, limbs_palette = search_limbs(data_source=data_source,
                                                   mask=mask)
    if palette is not None:
        limbs_palette = np.array(palette, dtype=np.uint8)[None]

    # check and pop the pop_parts
    assert set(pop_parts).issubset(
        keypoints_mapping.human_data.HUMAN_DATA_PALETTE.keys(
        )), f'wrong part_names in pop_parts, could only \
        choose in{set(keypoints_mapping.human_data.HUMAN_DATA_PALETTE.keys())}'

    for part_name in pop_parts:
        if part_name in limbs_target:
            limbs_target.pop(part_name)

    # initialize renderer and start render
    renderer = Axes3dJointsRenderer()
    renderer.init_camera(cam_hori_speed=orbit_speed, cam_elev_speed=0.2)
    renderer.set_connections(limbs_target, limbs_palette)
    if isinstance(frame_names, str):
        if '%' in frame_names:
            frame_names = [
                frame_names % index for index in range(input_pose_np.shape[0])
            ]
        else:
            frame_names = [frame_names] * input_pose_np.shape[0]
    image_array = renderer.render_kp3d_to_video(input_pose_np,
                                                output_path,
                                                convention,
                                                fps=fps,
                                                resolution=resolution,
                                                visual_range=value_range,
                                                frame_names=frame_names,
                                                disable_limbs=disable_limbs,
                                                return_array=return_array)
    return image_array