Vision-CAIR
/

LongVU_Llama3_2_1B

Video-Text-to-Text

Model card Files Files and versions Community

Upload config (2).json

#4

by thanhnhan29 - opened Dec 29, 2024

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

Files changed (1) hide show

config (2).json +72 -0

config (2).json ADDED Viewed

	@@ -0,0 +1,72 @@

+{
+  "_name_or_path": "/tmp/iopath_cache/manifold_cache/tree/users/shenx/finetune/10162317-cambrian_llama3_2-1b_t576_ov",
+  "architectures": ["CambrianLlamaForCausalLM"],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "connect_layer": 2,
+  "connector_depth": 3,
+  "connector_only": true,
+  "dino_threshold": 0.83,
+  "drop_threshold": 0.8,
+  "eos_token_id": [128001, 128008, 128009],
+  "frame_pos": false,
+  "freeze_mm_mlp_adapter": false,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "highres": true,
+  "highres_connect": false,
+  "image_aspect_ratio": "pad",
+  "image_position": 91,
+  "image_token_len": 144,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "is_image_newline": true,
+  "is_st_sampler": false,
+  "lowres_token": 8,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "mm_patch_merge_type": "flat",
+  "mm_projector_lr": null,
+  "mm_projector_type": "sva",
+  "mm_use_im_patch_token": false,
+  "mm_use_im_start_end": false,
+  "mm_vision_sampler_lr": null,
+  "mm_vision_select_feature": "patch",
+  "mm_vision_select_layer": -2,
+  "mm_vision_tower_aux_list": [
+    "siglip/CLIP-ViT-SO400M-14-384",
+    "facebook/dinov2-giant-res378"
+  ],
+  "mm_vision_tower_aux_token_len_list": [576, 576],
+  "mm_vision_tower_lr": null,
+  "model_type": "cambrian_llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "num_of_vision_sampler_layers": 10,
+  "num_query_group": 1,
+  "pretraining_tp": 1,
+  "query_num_list": [144],
+  "rms_norm_eps": 1e-5,
+  "rope_scaling": null,
+  "rope_theta": 500000.0,
+  "spmd_debug": null,
+  "spmd_fsdp_sharding": null,
+  "spmd_mesh": null,
+  "start_of_vision_sampler_layers": 0,
+  "stride_of_vision_sampler_layers": 3,
+  "tie_word_embeddings": false,
+  "tokenizer_model_max_length": 16000,
+  "tokenizer_padding_side": "right",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "tune_mm_mlp_adapter": false,
+  "unfreeze_mm_vision_tower": false,
+  "use_cache": false,
+  "use_mm_proj": true,
+  "vision_hidden_size": 1024,
+  "vision_tower_aux_token_len_list": [576, 576],
+  "vocab_size": 128256
+}