danielhanchen commited on
Commit
c69d3e5
·
verified ·
1 Parent(s): d304f1b

Add files using upload-large-folder tool

Browse files
config.json CHANGED
@@ -35,16 +35,14 @@
35
  "model.layers.0.mlp",
36
  "model.layers.1.self_attn",
37
  "model.layers.1.mlp",
38
- "model.layers.2.self_attn",
39
  "model.layers.2.mlp",
 
40
  "model.layers.3.mlp",
41
  "model.layers.4.self_attn",
42
  "model.layers.4.mlp",
43
  "model.layers.5.mlp",
44
  "model.layers.6.mlp",
45
  "model.layers.26.mlp",
46
- "model.layers.41.mlp",
47
- "model.layers.43.mlp",
48
  "model.layers.44.mlp",
49
  "model.layers.45.mlp",
50
  "model.layers.46.mlp",
@@ -62,7 +60,7 @@
62
  "sliding_window": null,
63
  "tie_word_embeddings": false,
64
  "torch_dtype": "bfloat16",
65
- "transformers_version": "4.48.0",
66
  "unsloth_fixed": true,
67
  "use_cache": true,
68
  "use_sliding_window": false,
 
35
  "model.layers.0.mlp",
36
  "model.layers.1.self_attn",
37
  "model.layers.1.mlp",
 
38
  "model.layers.2.mlp",
39
+ "model.layers.3.self_attn",
40
  "model.layers.3.mlp",
41
  "model.layers.4.self_attn",
42
  "model.layers.4.mlp",
43
  "model.layers.5.mlp",
44
  "model.layers.6.mlp",
45
  "model.layers.26.mlp",
 
 
46
  "model.layers.44.mlp",
47
  "model.layers.45.mlp",
48
  "model.layers.46.mlp",
 
60
  "sliding_window": null,
61
  "tie_word_embeddings": false,
62
  "torch_dtype": "bfloat16",
63
+ "transformers_version": "4.48.1",
64
  "unsloth_fixed": true,
65
  "use_cache": true,
66
  "use_sliding_window": false,
generation_config.json CHANGED
@@ -1,8 +1,11 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 151643,
 
4
  "eos_token_id": 151643,
5
  "max_length": 131072,
6
  "pad_token_id": 151654,
7
- "transformers_version": "4.48.0"
 
 
8
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "bos_token_id": 151646,
4
+ "do_sample": true,
5
  "eos_token_id": 151643,
6
  "max_length": 131072,
7
  "pad_token_id": 151654,
8
+ "temperature": 0.6,
9
+ "top_p": 0.95,
10
+ "transformers_version": "4.48.1"
11
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09173480700dd1ae108c7fea3915d13e9c239fc8afd04b4a956a8306deb5479d
3
- size 4989228974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7928b4f63506ae91d3d10750acb49dece5a864ca84d9bf482a0c2c5d0e3e277
3
+ size 4989228977
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb148bcca6503a7371d35aeff8c737b45db7ec968ca8757cf92faab32ec76e50
3
- size 4523133895
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bf2cfb0ab37e7fc7eec8c38fc55b61fd26134e989298788e01d111d6098f51f
3
+ size 3892874285
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 14480061251
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
@@ -506,11 +506,31 @@
506
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
507
  "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
508
  "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
509
  "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
510
  "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
511
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
512
  "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
513
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
514
  "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
515
  "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
516
  "model.layers.20.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
@@ -968,31 +988,11 @@
968
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
969
  "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
970
  "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
971
- "model.layers.3.self_attn.k_proj.weight.absmax": "model-00001-of-00003.safetensors",
972
- "model.layers.3.self_attn.k_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
973
- "model.layers.3.self_attn.k_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
974
- "model.layers.3.self_attn.k_proj.weight.quant_map": "model-00001-of-00003.safetensors",
975
- "model.layers.3.self_attn.k_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
976
  "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
977
- "model.layers.3.self_attn.o_proj.weight.absmax": "model-00001-of-00003.safetensors",
978
- "model.layers.3.self_attn.o_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
979
- "model.layers.3.self_attn.o_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
980
- "model.layers.3.self_attn.o_proj.weight.quant_map": "model-00001-of-00003.safetensors",
981
- "model.layers.3.self_attn.o_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
982
  "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
983
  "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
984
- "model.layers.3.self_attn.q_proj.weight.absmax": "model-00001-of-00003.safetensors",
985
- "model.layers.3.self_attn.q_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
986
- "model.layers.3.self_attn.q_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
987
- "model.layers.3.self_attn.q_proj.weight.quant_map": "model-00001-of-00003.safetensors",
988
- "model.layers.3.self_attn.q_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
989
  "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
990
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
991
- "model.layers.3.self_attn.v_proj.weight.absmax": "model-00001-of-00003.safetensors",
992
- "model.layers.3.self_attn.v_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
993
- "model.layers.3.self_attn.v_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
994
- "model.layers.3.self_attn.v_proj.weight.quant_map": "model-00001-of-00003.safetensors",
995
- "model.layers.3.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
996
  "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors",
997
  "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
998
  "model.layers.30.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
@@ -1524,8 +1524,23 @@
1524
  "model.layers.40.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1525
  "model.layers.41.input_layernorm.weight": "model-00003-of-00003.safetensors",
1526
  "model.layers.41.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
1527
  "model.layers.41.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
1528
  "model.layers.41.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
1529
  "model.layers.41.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1530
  "model.layers.41.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
1531
  "model.layers.41.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
@@ -1603,8 +1618,23 @@
1603
  "model.layers.42.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1604
  "model.layers.43.input_layernorm.weight": "model-00003-of-00003.safetensors",
1605
  "model.layers.43.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
1606
  "model.layers.43.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
1607
  "model.layers.43.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
 
 
1608
  "model.layers.43.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1609
  "model.layers.43.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
1610
  "model.layers.43.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 13849798100
4
  },
5
  "weight_map": {
6
  "lm_head.weight": "model-00003-of-00003.safetensors",
 
506
  "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
507
  "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
508
  "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
509
+ "model.layers.2.self_attn.k_proj.weight.absmax": "model-00001-of-00003.safetensors",
510
+ "model.layers.2.self_attn.k_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
511
+ "model.layers.2.self_attn.k_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
512
+ "model.layers.2.self_attn.k_proj.weight.quant_map": "model-00001-of-00003.safetensors",
513
+ "model.layers.2.self_attn.k_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
514
  "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
515
+ "model.layers.2.self_attn.o_proj.weight.absmax": "model-00001-of-00003.safetensors",
516
+ "model.layers.2.self_attn.o_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
517
+ "model.layers.2.self_attn.o_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
518
+ "model.layers.2.self_attn.o_proj.weight.quant_map": "model-00001-of-00003.safetensors",
519
+ "model.layers.2.self_attn.o_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
520
  "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
521
  "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
522
+ "model.layers.2.self_attn.q_proj.weight.absmax": "model-00001-of-00003.safetensors",
523
+ "model.layers.2.self_attn.q_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
524
+ "model.layers.2.self_attn.q_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
525
+ "model.layers.2.self_attn.q_proj.weight.quant_map": "model-00001-of-00003.safetensors",
526
+ "model.layers.2.self_attn.q_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
527
  "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
528
  "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
529
+ "model.layers.2.self_attn.v_proj.weight.absmax": "model-00001-of-00003.safetensors",
530
+ "model.layers.2.self_attn.v_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
531
+ "model.layers.2.self_attn.v_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
532
+ "model.layers.2.self_attn.v_proj.weight.quant_map": "model-00001-of-00003.safetensors",
533
+ "model.layers.2.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
534
  "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
535
  "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
536
  "model.layers.20.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
 
988
  "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
989
  "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
990
  "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
991
  "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
992
  "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
993
  "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
994
  "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
995
  "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
 
 
996
  "model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors",
997
  "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
998
  "model.layers.30.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
 
1524
  "model.layers.40.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1525
  "model.layers.41.input_layernorm.weight": "model-00003-of-00003.safetensors",
1526
  "model.layers.41.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
1527
+ "model.layers.41.mlp.down_proj.weight.absmax": "model-00003-of-00003.safetensors",
1528
+ "model.layers.41.mlp.down_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
1529
+ "model.layers.41.mlp.down_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
1530
+ "model.layers.41.mlp.down_proj.weight.quant_map": "model-00003-of-00003.safetensors",
1531
+ "model.layers.41.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1532
  "model.layers.41.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
1533
+ "model.layers.41.mlp.gate_proj.weight.absmax": "model-00003-of-00003.safetensors",
1534
+ "model.layers.41.mlp.gate_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
1535
+ "model.layers.41.mlp.gate_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
1536
+ "model.layers.41.mlp.gate_proj.weight.quant_map": "model-00003-of-00003.safetensors",
1537
+ "model.layers.41.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1538
  "model.layers.41.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
1539
+ "model.layers.41.mlp.up_proj.weight.absmax": "model-00003-of-00003.safetensors",
1540
+ "model.layers.41.mlp.up_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
1541
+ "model.layers.41.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
1542
+ "model.layers.41.mlp.up_proj.weight.quant_map": "model-00003-of-00003.safetensors",
1543
+ "model.layers.41.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1544
  "model.layers.41.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1545
  "model.layers.41.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
1546
  "model.layers.41.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
 
1618
  "model.layers.42.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1619
  "model.layers.43.input_layernorm.weight": "model-00003-of-00003.safetensors",
1620
  "model.layers.43.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
1621
+ "model.layers.43.mlp.down_proj.weight.absmax": "model-00003-of-00003.safetensors",
1622
+ "model.layers.43.mlp.down_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
1623
+ "model.layers.43.mlp.down_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
1624
+ "model.layers.43.mlp.down_proj.weight.quant_map": "model-00003-of-00003.safetensors",
1625
+ "model.layers.43.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1626
  "model.layers.43.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
1627
+ "model.layers.43.mlp.gate_proj.weight.absmax": "model-00003-of-00003.safetensors",
1628
+ "model.layers.43.mlp.gate_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
1629
+ "model.layers.43.mlp.gate_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
1630
+ "model.layers.43.mlp.gate_proj.weight.quant_map": "model-00003-of-00003.safetensors",
1631
+ "model.layers.43.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1632
  "model.layers.43.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
1633
+ "model.layers.43.mlp.up_proj.weight.absmax": "model-00003-of-00003.safetensors",
1634
+ "model.layers.43.mlp.up_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
1635
+ "model.layers.43.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
1636
+ "model.layers.43.mlp.up_proj.weight.quant_map": "model-00003-of-00003.safetensors",
1637
+ "model.layers.43.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
1638
  "model.layers.43.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1639
  "model.layers.43.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
1640
  "model.layers.43.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
tokenizer_config.json CHANGED
@@ -181,12 +181,12 @@
181
  }
182
  },
183
  "bos_token": "<|begin▁of▁sentence|>",
184
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
185
  "clean_up_tokenization_spaces": false,
186
  "eos_token": "<|end▁of▁sentence|>",
187
  "extra_special_tokens": {},
188
  "legacy": true,
189
- "model_max_length": 16384,
190
  "pad_token": "<|vision_pad|>",
191
  "padding_side": "left",
192
  "sp_model_kwargs": {},
 
181
  }
182
  },
183
  "bos_token": "<|begin▁of▁sentence|>",
184
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
185
  "clean_up_tokenization_spaces": false,
186
  "eos_token": "<|end▁of▁sentence|>",
187
  "extra_special_tokens": {},
188
  "legacy": true,
189
+ "model_max_length": 131072,
190
  "pad_token": "<|vision_pad|>",
191
  "padding_side": "left",
192
  "sp_model_kwargs": {},