Spaces:
Runtime error
Runtime error
text_encoder: bert-base-uncased | |
bert_config: configs/config_bert.json | |
vit_type: beit # items in ${vit_zoo} | |
vit_zoo: # from huggingface | |
beit: microsoft/beit-base-patch16-224-pt22k-ft22k | |
vit_name_or_pretrained_path: ${vit_zoo[${vit_type}]} | |
vision_encoder_args: | |
token_keep_rate: 0.7 | |
token_keep_strategy: cls_attn | |
token_drop_loc: [3, 6, 9] | |
sparse_local_attn: 1 | |
sparse_random_attn: 5 | |
attn_block_size: 56 | |
image_res: 224 | |
embed_dim: 256 | |
video_input: | |
num_frames: 4 | |
reader: decord # one of [decord, av] | |
sample_type: rand | |
num_frames_test: 16 # num_frames during inference/test | |
sample_type_test: middle | |
max_txt_l: | |
image: 32 | |
video: 32 | |
batch_size: | |
image: 8 | |
video: 8 | |
batch_size_test: | |
image: 8 | |
video: 8 | |
k_test: 128 | |
temp: 0.18 | |
mlm_prob: 0.5 | |