Add files using upload-large-folder tool

Browse files

Files changed (6) hide show

config.json +2 -4
generation_config.json +5 -2
model-00001-of-00003.safetensors +2 -2
model-00003-of-00003.safetensors +2 -2
model.safetensors.index.json +51 -21
tokenizer_config.json +2 -2

config.json CHANGED Viewed

@@ -35,16 +35,14 @@
       "model.layers.0.mlp",
       "model.layers.1.self_attn",
       "model.layers.1.mlp",
-      "model.layers.2.self_attn",
       "model.layers.2.mlp",
       "model.layers.3.mlp",
       "model.layers.4.self_attn",
       "model.layers.4.mlp",
       "model.layers.5.mlp",
       "model.layers.6.mlp",
       "model.layers.26.mlp",
-      "model.layers.41.mlp",
-      "model.layers.43.mlp",
       "model.layers.44.mlp",
       "model.layers.45.mlp",
       "model.layers.46.mlp",
@@ -62,7 +60,7 @@
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
-  "transformers_version": "4.48.0",
   "unsloth_fixed": true,
   "use_cache": true,
   "use_sliding_window": false,

       "model.layers.0.mlp",
       "model.layers.1.self_attn",
       "model.layers.1.mlp",
       "model.layers.2.mlp",
+      "model.layers.3.self_attn",
       "model.layers.3.mlp",
       "model.layers.4.self_attn",
       "model.layers.4.mlp",
       "model.layers.5.mlp",
       "model.layers.6.mlp",
       "model.layers.26.mlp",
       "model.layers.44.mlp",
       "model.layers.45.mlp",
       "model.layers.46.mlp",
   "sliding_window": null,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
+  "transformers_version": "4.48.1",
   "unsloth_fixed": true,
   "use_cache": true,
   "use_sliding_window": false,

generation_config.json CHANGED Viewed

@@ -1,8 +1,11 @@
 {
   "_from_model_config": true,
-  "bos_token_id": 151643,
   "eos_token_id": 151643,
   "max_length": 131072,
   "pad_token_id": 151654,
-  "transformers_version": "4.48.0"
 }

 {
   "_from_model_config": true,
+  "bos_token_id": 151646,
+  "do_sample": true,
   "eos_token_id": 151643,
   "max_length": 131072,
   "pad_token_id": 151654,
+  "temperature": 0.6,
+  "top_p": 0.95,
+  "transformers_version": "4.48.1"
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09173480700dd1ae108c7fea3915d13e9c239fc8afd04b4a956a8306deb5479d
-size 4989228974

 version https://git-lfs.github.com/spec/v1
+oid sha256:d7928b4f63506ae91d3d10750acb49dece5a864ca84d9bf482a0c2c5d0e3e277
+size 4989228977

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb148bcca6503a7371d35aeff8c737b45db7ec968ca8757cf92faab32ec76e50
-size 4523133895

 version https://git-lfs.github.com/spec/v1
+oid sha256:5bf2cfb0ab37e7fc7eec8c38fc55b61fd26134e989298788e01d111d6098f51f
+size 3892874285

model.safetensors.index.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "metadata": {
-    "total_size": 14480061251
   },
   "weight_map": {
     "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -506,11 +506,31 @@
     "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
     "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.20.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
@@ -968,31 +988,11 @@
     "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.k_proj.weight.absmax": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.k_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.k_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.k_proj.weight.quant_map": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.k_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.o_proj.weight.absmax": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.o_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.o_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.o_proj.weight.quant_map": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.o_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.q_proj.weight.absmax": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.q_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.q_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.q_proj.weight.quant_map": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.q_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.v_proj.weight.absmax": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.v_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.v_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.v_proj.weight.quant_map": "model-00001-of-00003.safetensors",
-    "model.layers.3.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
     "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors",
     "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.30.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
@@ -1524,8 +1524,23 @@
     "model.layers.40.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.41.input_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.41.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.41.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.41.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.41.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.41.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
     "model.layers.41.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
@@ -1603,8 +1618,23 @@
     "model.layers.42.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.43.input_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.43.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.43.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.43.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.43.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.43.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
     "model.layers.43.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",

 {
   "metadata": {
+    "total_size": 13849798100
   },
   "weight_map": {
     "lm_head.weight": "model-00003-of-00003.safetensors",
     "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight.absmax": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight.quant_map": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.k_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight.absmax": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight.quant_map": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.o_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight.absmax": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight.quant_map": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.q_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight.absmax": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight.quant_map": "model-00001-of-00003.safetensors",
+    "model.layers.2.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
     "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
     "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.20.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
     "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
     "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
     "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors",
     "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
     "model.layers.30.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
     "model.layers.40.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.41.input_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.41.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.down_proj.weight.absmax": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.down_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.down_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.down_proj.weight.quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.41.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.gate_proj.weight.absmax": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.gate_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.gate_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.gate_proj.weight.quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.41.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.up_proj.weight.absmax": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.up_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.up_proj.weight.quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.41.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.41.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.41.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
     "model.layers.41.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
     "model.layers.42.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.43.input_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.43.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.down_proj.weight.absmax": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.down_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.down_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.down_proj.weight.quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.43.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.gate_proj.weight.absmax": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.gate_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.gate_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.gate_proj.weight.quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.43.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.up_proj.weight.absmax": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.up_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.up_proj.weight.quant_map": "model-00003-of-00003.safetensors",
+    "model.layers.43.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
     "model.layers.43.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
     "model.layers.43.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
     "model.layers.43.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",

tokenizer_config.json CHANGED Viewed

@@ -181,12 +181,12 @@
     }
   },
   "bos_token": "<｜begin▁of▁sentence｜>",
-  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<｜end▁of▁sentence｜>",
   "extra_special_tokens": {},
   "legacy": true,
-  "model_max_length": 16384,
   "pad_token": "<|vision_pad|>",
   "padding_side": "left",
   "sp_model_kwargs": {},

     }
   },
   "bos_token": "<｜begin▁of▁sentence｜>",
+  "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<｜User｜>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<｜Assistant｜><｜tool▁calls▁begin｜><｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<｜tool▁call▁begin｜>' + tool['type'] + '<｜tool▁sep｜>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<｜tool▁call▁end｜>'}}{{'<｜tool▁calls▁end｜><｜end▁of▁sentence｜>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<｜tool▁outputs▁end｜>' + message['content'] + '<｜end▁of▁sentence｜>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<｜Assistant｜>' + content + '<｜end▁of▁sentence｜>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<｜tool▁outputs▁begin｜><｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<｜tool▁output▁begin｜>' + message['content'] + '<｜tool▁output▁end｜>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<｜tool▁outputs▁end｜>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<｜Assistant｜>'}}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<｜end▁of▁sentence｜>",
   "extra_special_tokens": {},
   "legacy": true,
+  "model_max_length": 131072,
   "pad_token": "<|vision_pad|>",
   "padding_side": "left",
   "sp_model_kwargs": {},