Spaces:
Running
Running
_base_ = ['../_base_/datasets/human_ml3d_bs128.py'] | |
# checkpoint saving | |
checkpoint_config = dict(interval=1) | |
dist_params = dict(backend='nccl') | |
log_level = 'INFO' | |
load_from = None | |
resume_from = None | |
workflow = [('train', 1)] | |
# optimizer | |
optimizer = dict(type='Adam', lr=2e-4) | |
optimizer_config = dict(grad_clip=None) | |
# learning policy | |
lr_config = dict(policy='step', step=[]) | |
runner = dict(type='EpochBasedRunner', max_epochs=50) | |
log_config = dict( | |
interval=50, | |
hooks=[ | |
dict(type='TextLoggerHook'), | |
# dict(type='TensorboardLoggerHook') | |
]) | |
input_feats = 263 | |
max_seq_len = 196 | |
latent_dim = 512 | |
time_embed_dim = 2048 | |
text_latent_dim = 256 | |
ff_size = 1024 | |
num_heads = 8 | |
dropout = 0 | |
# model settings | |
model = dict( | |
type='MotionDiffusion', | |
model=dict( | |
type='MotionDiffuseTransformer', | |
input_feats=input_feats, | |
max_seq_len=max_seq_len, | |
latent_dim=latent_dim, | |
time_embed_dim=time_embed_dim, | |
num_layers=8, | |
sa_block_cfg=dict( | |
type='EfficientSelfAttention', | |
latent_dim=latent_dim, | |
num_heads=num_heads, | |
dropout=dropout, | |
time_embed_dim=time_embed_dim | |
), | |
ca_block_cfg=dict( | |
type='EfficientCrossAttention', | |
latent_dim=latent_dim, | |
text_latent_dim=text_latent_dim, | |
num_heads=num_heads, | |
dropout=dropout, | |
time_embed_dim=time_embed_dim | |
), | |
ffn_cfg=dict( | |
latent_dim=latent_dim, | |
ffn_dim=ff_size, | |
dropout=dropout, | |
time_embed_dim=time_embed_dim | |
), | |
text_encoder=dict( | |
pretrained_model='clip', | |
latent_dim=text_latent_dim, | |
num_layers=4, | |
num_heads=4, | |
ff_size=2048, | |
dropout=dropout, | |
use_text_proj=True | |
) | |
), | |
loss_recon=dict(type='MSELoss', loss_weight=1, reduction='none'), | |
diffusion_train=dict( | |
beta_scheduler='linear', | |
diffusion_steps=1000, | |
model_mean_type='epsilon', | |
model_var_type='fixed_small', | |
), | |
diffusion_test=dict( | |
beta_scheduler='linear', | |
diffusion_steps=1000, | |
model_mean_type='epsilon', | |
model_var_type='fixed_small', | |
), | |
inference_type='ddpm' | |
) | |
data = dict(samples_per_gpu=128) |