Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

added_tokens copy.json +16 -0
added_tokens.json +16 -0
chat_template.json +3 -0
config.json +44 -348
model.safetensors +2 -2

added_tokens copy.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

added_tokens.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

chat_template.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+    "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
+}

config.json CHANGED Viewed

@@ -1,352 +1,48 @@
 {
-    "_attn_implementation_autoset": true,
-    "add_cross_attention": false,
-    "architectures": [
-        "Qwen2VLForConditionalGeneration"
     ],
-    "attention_dropout": 0.0,
-    "bad_words_ids": null,
-    "begin_suppress_tokens": null,
-    "bos_token_id": 151643,
-    "chunk_size_feed_forward": 0,
-    "cross_attention_hidden_size": null,
-    "decoder_start_token_id": null,
-    "diversity_penalty": 0.0,
-    "do_sample": false,
-    "early_stopping": false,
-    "encoder_no_repeat_ngram_size": 0,
-    "eos_token_id": 151645,
-    "exponential_decay_length_penalty": null,
-    "finetuning_task": null,
-    "forced_bos_token_id": null,
-    "forced_eos_token_id": null,
-    "hidden_act": "silu",
     "hidden_size": 1536,
-    "id2label": {
-        "0": "LABEL_0",
-        "1": "LABEL_1"
-    },
-    "image_token_id": 151655,
-    "initializer_range": 0.02,
-    "intermediate_size": 8960,
-    "is_decoder": false,
-    "is_encoder_decoder": false,
-    "label2id": {
-        "LABEL_0": 0,
-        "LABEL_1": 1
-    },
-    "length_penalty": 1.0,
-    "max_length": 20,
-    "max_position_embeddings": 32768,
-    "max_window_layers": 28,
-    "min_length": 0,
     "model_type": "qwen2_vl",
-    "no_repeat_ngram_size": 0,
-    "num_attention_heads": 12,
-    "num_beam_groups": 1,
-    "num_beams": 1,
-    "num_hidden_layers": 28,
-    "num_key_value_heads": 2,
-    "num_return_sequences": 1,
-    "output_attentions": false,
-    "output_hidden_states": false,
-    "output_scores": false,
-    "pad_token_id": null,
-    "prefix": null,
-    "problem_type": null,
-    "pruned_heads": {},
-    "quantization": {
-        "group_size": 64,
-        "bits": 4
-    },
-    "remove_invalid_values": false,
-    "repetition_penalty": 1.0,
-    "return_dict": true,
-    "return_dict_in_generate": false,
-    "rms_norm_eps": 1e-06,
-    "rope_scaling": {
-        "mrope_section": [
-            16,
-            24,
-            24
-        ],
-        "type": "default",
-        "rope_type": "default"
-    },
-    "rope_theta": 1000000.0,
-    "rope_traditional": true,
-    "sep_token_id": null,
-    "sliding_window": 32768,
-    "suppress_tokens": null,
-    "task_specific_params": null,
-    "temperature": 1.0,
-    "text_config": {
-        "vision_config": {
-            "return_dict": true,
-            "output_hidden_states": false,
-            "output_attentions": false,
-            "torchscript": false,
-            "torch_dtype": null,
-            "use_bfloat16": false,
-            "tf_legacy_loss": false,
-            "pruned_heads": {},
-            "tie_word_embeddings": true,
-            "chunk_size_feed_forward": 0,
-            "is_encoder_decoder": false,
-            "is_decoder": false,
-            "cross_attention_hidden_size": null,
-            "add_cross_attention": false,
-            "tie_encoder_decoder": false,
-            "max_length": 20,
-            "min_length": 0,
-            "do_sample": false,
-            "early_stopping": false,
-            "num_beams": 1,
-            "num_beam_groups": 1,
-            "diversity_penalty": 0.0,
-            "temperature": 1.0,
-            "top_k": 50,
-            "top_p": 1.0,
-            "typical_p": 1.0,
-            "repetition_penalty": 1.0,
-            "length_penalty": 1.0,
-            "no_repeat_ngram_size": 0,
-            "encoder_no_repeat_ngram_size": 0,
-            "bad_words_ids": null,
-            "num_return_sequences": 1,
-            "output_scores": false,
-            "return_dict_in_generate": false,
-            "forced_bos_token_id": null,
-            "forced_eos_token_id": null,
-            "remove_invalid_values": false,
-            "exponential_decay_length_penalty": null,
-            "suppress_tokens": null,
-            "begin_suppress_tokens": null,
-            "architectures": null,
-            "finetuning_task": null,
-            "id2label": {
-                "0": "LABEL_0",
-                "1": "LABEL_1"
-            },
-            "label2id": {
-                "LABEL_0": 0,
-                "LABEL_1": 1
-            },
-            "tokenizer_class": null,
-            "prefix": null,
-            "bos_token_id": null,
-            "pad_token_id": null,
-            "eos_token_id": null,
-            "sep_token_id": null,
-            "decoder_start_token_id": null,
-            "task_specific_params": null,
-            "problem_type": null,
-            "_name_or_path": "",
-            "_attn_implementation_autoset": false,
-            "in_chans": 3,
-            "model_type": "qwen2_vl",
-            "spatial_patch_size": 14,
-            "depth": 32,
-            "embed_dim": 1280,
-            "hidden_size": 1536,
-            "hidden_act": "quick_gelu",
-            "mlp_ratio": 4,
-            "num_heads": 16,
-            "in_channels": 3,
-            "patch_size": 14,
-            "spatial_merge_size": 2,
-            "temporal_patch_size": 2
-        },
-        "vocab_size": 151936,
-        "max_position_embeddings": 32768,
-        "hidden_size": 1536,
-        "intermediate_size": 8960,
-        "num_hidden_layers": 28,
-        "num_attention_heads": 12,
-        "use_sliding_window": false,
-        "sliding_window": 32768,
-        "max_window_layers": 28,
-        "num_key_value_heads": 2,
-        "hidden_act": "silu",
-        "initializer_range": 0.02,
-        "rms_norm_eps": 1e-06,
-        "use_cache": false,
-        "rope_theta": 1000000.0,
-        "attention_dropout": 0.0,
-        "rope_scaling": {
-            "mrope_section": [
-                16,
-                24,
-                24
-            ],
-            "type": "default",
-            "rope_type": "default"
-        },
-        "return_dict": true,
-        "output_hidden_states": false,
-        "output_attentions": false,
-        "torchscript": false,
-        "torch_dtype": "bfloat16",
-        "use_bfloat16": false,
-        "tf_legacy_loss": false,
-        "pruned_heads": {},
-        "tie_word_embeddings": true,
-        "chunk_size_feed_forward": 0,
-        "is_encoder_decoder": false,
-        "is_decoder": false,
-        "cross_attention_hidden_size": null,
-        "add_cross_attention": false,
-        "tie_encoder_decoder": false,
-        "max_length": 20,
-        "min_length": 0,
-        "do_sample": false,
-        "early_stopping": false,
-        "num_beams": 1,
-        "num_beam_groups": 1,
-        "diversity_penalty": 0.0,
-        "temperature": 1.0,
-        "top_k": 50,
-        "top_p": 1.0,
-        "typical_p": 1.0,
-        "repetition_penalty": 1.0,
-        "length_penalty": 1.0,
-        "no_repeat_ngram_size": 0,
-        "encoder_no_repeat_ngram_size": 0,
-        "bad_words_ids": null,
-        "num_return_sequences": 1,
-        "output_scores": false,
-        "return_dict_in_generate": false,
-        "forced_bos_token_id": null,
-        "forced_eos_token_id": null,
-        "remove_invalid_values": false,
-        "exponential_decay_length_penalty": null,
-        "suppress_tokens": null,
-        "begin_suppress_tokens": null,
-        "architectures": [
-            "Qwen2VLForConditionalGeneration"
-        ],
-        "finetuning_task": null,
-        "id2label": {
-            "0": "LABEL_0",
-            "1": "LABEL_1"
-        },
-        "label2id": {
-            "LABEL_0": 0,
-            "LABEL_1": 1
-        },
-        "tokenizer_class": null,
-        "prefix": null,
-        "bos_token_id": 151643,
-        "pad_token_id": null,
-        "eos_token_id": 151645,
-        "sep_token_id": null,
-        "decoder_start_token_id": null,
-        "task_specific_params": null,
-        "problem_type": null,
-        "_name_or_path": "ShiniShiho/ShowUI-2B",
-        "_attn_implementation_autoset": true,
-        "image_token_id": 151655,
-        "model_type": "qwen2_vl",
-        "tokenizer_model_max_length": 4096,
-        "video_token_id": 151656,
-        "vision_end_token_id": 151653,
-        "vision_start_token_id": 151652,
-        "vision_token_id": 151654
-    },
-    "tf_legacy_loss": false,
-    "tie_encoder_decoder": false,
-    "tie_word_embeddings": true,
-    "tokenizer_class": null,
-    "tokenizer_model_max_length": 4096,
-    "top_k": 50,
-    "top_p": 1.0,
-    "torch_dtype": "bfloat16",
-    "torchscript": false,
-    "transformers_version": "4.48.3",
-    "typical_p": 1.0,
-    "use_bfloat16": false,
-    "use_cache": false,
-    "use_sliding_window": false,
-    "video_token_id": 151656,
-    "vision_config": {
-        "return_dict": true,
-        "output_hidden_states": false,
-        "output_attentions": false,
-        "torchscript": false,
-        "torch_dtype": null,
-        "use_bfloat16": false,
-        "tf_legacy_loss": false,
-        "pruned_heads": {},
-        "tie_word_embeddings": true,
-        "chunk_size_feed_forward": 0,
-        "is_encoder_decoder": false,
-        "is_decoder": false,
-        "cross_attention_hidden_size": null,
-        "add_cross_attention": false,
-        "tie_encoder_decoder": false,
-        "max_length": 20,
-        "min_length": 0,
-        "do_sample": false,
-        "early_stopping": false,
-        "num_beams": 1,
-        "num_beam_groups": 1,
-        "diversity_penalty": 0.0,
-        "temperature": 1.0,
-        "top_k": 50,
-        "top_p": 1.0,
-        "typical_p": 1.0,
-        "repetition_penalty": 1.0,
-        "length_penalty": 1.0,
-        "no_repeat_ngram_size": 0,
-        "encoder_no_repeat_ngram_size": 0,
-        "bad_words_ids": null,
-        "num_return_sequences": 1,
-        "output_scores": false,
-        "return_dict_in_generate": false,
-        "forced_bos_token_id": null,
-        "forced_eos_token_id": null,
-        "remove_invalid_values": false,
-        "exponential_decay_length_penalty": null,
-        "suppress_tokens": null,
-        "begin_suppress_tokens": null,
-        "architectures": null,
-        "finetuning_task": null,
-        "id2label": {
-            "0": "LABEL_0",
-            "1": "LABEL_1"
-        },
-        "label2id": {
-            "LABEL_0": 0,
-            "LABEL_1": 1
-        },
-        "tokenizer_class": null,
-        "prefix": null,
-        "bos_token_id": null,
-        "pad_token_id": null,
-        "eos_token_id": null,
-        "sep_token_id": null,
-        "decoder_start_token_id": null,
-        "task_specific_params": null,
-        "problem_type": null,
-        "_name_or_path": "",
-        "_attn_implementation_autoset": false,
-        "transformers_version": "4.48.3",
-        "in_chans": 3,
-        "model_type": "qwen2_vl",
-        "spatial_patch_size": 14,
-        "depth": 32,
-        "embed_dim": 1280,
-        "hidden_size": 1536,
-        "hidden_act": "quick_gelu",
-        "mlp_ratio": 4,
-        "num_heads": 16,
-        "in_channels": 3,
-        "patch_size": 14,
-        "spatial_merge_size": 2,
-        "temporal_patch_size": 2
-    },
-    "vision_end_token_id": 151653,
-    "vision_start_token_id": 151652,
-    "vision_token_id": 151654,
-    "vocab_size": 151936
-}

 {
+  "_name_or_path": "Qwen/Qwen2-VL-2B-Instruct",
+  "architectures": [
+    "Qwen2VLForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "image_token_id": 151655,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen2_vl",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "mrope_section": [
+      16,
+      24,
+      24
     ],
+    "type": "mrope"
+  },
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": true,
+  "tokenizer_model_max_length": 4096,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.45.0.dev0",
+  "use_cache": false,
+  "use_sliding_window": false,
+  "video_token_id": 151656,
+  "vision_config": {
     "hidden_size": 1536,
+    "in_chans": 3,
     "model_type": "qwen2_vl",
+    "spatial_patch_size": 14
+  },
+  "vision_end_token_id": 151653,
+  "vision_start_token_id": 151652,
+  "vision_token_id": 151654,
+  "vocab_size": 151936
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0715fbe4bee2611c2625e07c32e14c228bf81b416ee04c92df1ee2c0e4946253
-size 1388178008

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5ac71ee7fda50033f79a375f26bd39f6e12f4595df830c33182658c78a5ed6f
+size 1543855784