Lyra_Qwen2VL_2B_ViT / config.json
zszhong's picture
First model version
aae5cd7
{
"architectures": [
"Qwen2VisionTransformerPretrainedModel"
],
"vision_config": {
"depth": 32,
"embed_dim": 1280,
"hidden_act": "quick_gelu",
"hidden_size": 1536,
"in_channels": 3,
"in_chans": 3,
"initializer_range": 0.02,
"mlp_ratio": 4,
"num_heads": 16,
"patch_size": 14,
"spatial_merge_size": 2,
"spatial_patch_size": 14,
"temporal_patch_size": 2,
"initializer_range": 0.02,
"_attn_implementation": "flash_attention_2"
},
"model_type": "qwen2_vl",
"torch_dtype": "bfloat16",
"transformers_version": "4.45.0"
}