danielhanchen
commited on
Add files using upload-large-folder tool
Browse files- config.json +2 -4
- generation_config.json +5 -2
- model-00001-of-00003.safetensors +2 -2
- model-00003-of-00003.safetensors +2 -2
- model.safetensors.index.json +51 -21
- tokenizer_config.json +2 -2
config.json
CHANGED
@@ -35,16 +35,14 @@
|
|
35 |
"model.layers.0.mlp",
|
36 |
"model.layers.1.self_attn",
|
37 |
"model.layers.1.mlp",
|
38 |
-
"model.layers.2.self_attn",
|
39 |
"model.layers.2.mlp",
|
|
|
40 |
"model.layers.3.mlp",
|
41 |
"model.layers.4.self_attn",
|
42 |
"model.layers.4.mlp",
|
43 |
"model.layers.5.mlp",
|
44 |
"model.layers.6.mlp",
|
45 |
"model.layers.26.mlp",
|
46 |
-
"model.layers.41.mlp",
|
47 |
-
"model.layers.43.mlp",
|
48 |
"model.layers.44.mlp",
|
49 |
"model.layers.45.mlp",
|
50 |
"model.layers.46.mlp",
|
@@ -62,7 +60,7 @@
|
|
62 |
"sliding_window": null,
|
63 |
"tie_word_embeddings": false,
|
64 |
"torch_dtype": "bfloat16",
|
65 |
-
"transformers_version": "4.48.
|
66 |
"unsloth_fixed": true,
|
67 |
"use_cache": true,
|
68 |
"use_sliding_window": false,
|
|
|
35 |
"model.layers.0.mlp",
|
36 |
"model.layers.1.self_attn",
|
37 |
"model.layers.1.mlp",
|
|
|
38 |
"model.layers.2.mlp",
|
39 |
+
"model.layers.3.self_attn",
|
40 |
"model.layers.3.mlp",
|
41 |
"model.layers.4.self_attn",
|
42 |
"model.layers.4.mlp",
|
43 |
"model.layers.5.mlp",
|
44 |
"model.layers.6.mlp",
|
45 |
"model.layers.26.mlp",
|
|
|
|
|
46 |
"model.layers.44.mlp",
|
47 |
"model.layers.45.mlp",
|
48 |
"model.layers.46.mlp",
|
|
|
60 |
"sliding_window": null,
|
61 |
"tie_word_embeddings": false,
|
62 |
"torch_dtype": "bfloat16",
|
63 |
+
"transformers_version": "4.48.1",
|
64 |
"unsloth_fixed": true,
|
65 |
"use_cache": true,
|
66 |
"use_sliding_window": false,
|
generation_config.json
CHANGED
@@ -1,8 +1,11 @@
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
-
"bos_token_id":
|
|
|
4 |
"eos_token_id": 151643,
|
5 |
"max_length": 131072,
|
6 |
"pad_token_id": 151654,
|
7 |
-
"
|
|
|
|
|
8 |
}
|
|
|
1 |
{
|
2 |
"_from_model_config": true,
|
3 |
+
"bos_token_id": 151646,
|
4 |
+
"do_sample": true,
|
5 |
"eos_token_id": 151643,
|
6 |
"max_length": 131072,
|
7 |
"pad_token_id": 151654,
|
8 |
+
"temperature": 0.6,
|
9 |
+
"top_p": 0.95,
|
10 |
+
"transformers_version": "4.48.1"
|
11 |
}
|
model-00001-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7928b4f63506ae91d3d10750acb49dece5a864ca84d9bf482a0c2c5d0e3e277
|
3 |
+
size 4989228977
|
model-00003-of-00003.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5bf2cfb0ab37e7fc7eec8c38fc55b61fd26134e989298788e01d111d6098f51f
|
3 |
+
size 3892874285
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00003-of-00003.safetensors",
|
@@ -506,11 +506,31 @@
|
|
506 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
507 |
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
508 |
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
509 |
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
510 |
"model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
511 |
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
512 |
"model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
513 |
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
514 |
"model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
515 |
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
516 |
"model.layers.20.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
|
@@ -968,31 +988,11 @@
|
|
968 |
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
969 |
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
970 |
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
971 |
-
"model.layers.3.self_attn.k_proj.weight.absmax": "model-00001-of-00003.safetensors",
|
972 |
-
"model.layers.3.self_attn.k_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
|
973 |
-
"model.layers.3.self_attn.k_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
|
974 |
-
"model.layers.3.self_attn.k_proj.weight.quant_map": "model-00001-of-00003.safetensors",
|
975 |
-
"model.layers.3.self_attn.k_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
|
976 |
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
977 |
-
"model.layers.3.self_attn.o_proj.weight.absmax": "model-00001-of-00003.safetensors",
|
978 |
-
"model.layers.3.self_attn.o_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
|
979 |
-
"model.layers.3.self_attn.o_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
|
980 |
-
"model.layers.3.self_attn.o_proj.weight.quant_map": "model-00001-of-00003.safetensors",
|
981 |
-
"model.layers.3.self_attn.o_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
|
982 |
"model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
983 |
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
984 |
-
"model.layers.3.self_attn.q_proj.weight.absmax": "model-00001-of-00003.safetensors",
|
985 |
-
"model.layers.3.self_attn.q_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
|
986 |
-
"model.layers.3.self_attn.q_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
|
987 |
-
"model.layers.3.self_attn.q_proj.weight.quant_map": "model-00001-of-00003.safetensors",
|
988 |
-
"model.layers.3.self_attn.q_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
|
989 |
"model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
990 |
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
991 |
-
"model.layers.3.self_attn.v_proj.weight.absmax": "model-00001-of-00003.safetensors",
|
992 |
-
"model.layers.3.self_attn.v_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
|
993 |
-
"model.layers.3.self_attn.v_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
|
994 |
-
"model.layers.3.self_attn.v_proj.weight.quant_map": "model-00001-of-00003.safetensors",
|
995 |
-
"model.layers.3.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
|
996 |
"model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
997 |
"model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
998 |
"model.layers.30.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
|
@@ -1524,8 +1524,23 @@
|
|
1524 |
"model.layers.40.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1525 |
"model.layers.41.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
1526 |
"model.layers.41.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1527 |
"model.layers.41.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1528 |
"model.layers.41.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1529 |
"model.layers.41.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
1530 |
"model.layers.41.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
1531 |
"model.layers.41.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
@@ -1603,8 +1618,23 @@
|
|
1603 |
"model.layers.42.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1604 |
"model.layers.43.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
1605 |
"model.layers.43.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1606 |
"model.layers.43.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1607 |
"model.layers.43.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
1608 |
"model.layers.43.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
1609 |
"model.layers.43.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
1610 |
"model.layers.43.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 13849798100
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00003-of-00003.safetensors",
|
|
|
506 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
507 |
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
508 |
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
509 |
+
"model.layers.2.self_attn.k_proj.weight.absmax": "model-00001-of-00003.safetensors",
|
510 |
+
"model.layers.2.self_attn.k_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
|
511 |
+
"model.layers.2.self_attn.k_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
|
512 |
+
"model.layers.2.self_attn.k_proj.weight.quant_map": "model-00001-of-00003.safetensors",
|
513 |
+
"model.layers.2.self_attn.k_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
|
514 |
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
515 |
+
"model.layers.2.self_attn.o_proj.weight.absmax": "model-00001-of-00003.safetensors",
|
516 |
+
"model.layers.2.self_attn.o_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
|
517 |
+
"model.layers.2.self_attn.o_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
|
518 |
+
"model.layers.2.self_attn.o_proj.weight.quant_map": "model-00001-of-00003.safetensors",
|
519 |
+
"model.layers.2.self_attn.o_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
|
520 |
"model.layers.2.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
521 |
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
522 |
+
"model.layers.2.self_attn.q_proj.weight.absmax": "model-00001-of-00003.safetensors",
|
523 |
+
"model.layers.2.self_attn.q_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
|
524 |
+
"model.layers.2.self_attn.q_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
|
525 |
+
"model.layers.2.self_attn.q_proj.weight.quant_map": "model-00001-of-00003.safetensors",
|
526 |
+
"model.layers.2.self_attn.q_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
|
527 |
"model.layers.2.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
528 |
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
529 |
+
"model.layers.2.self_attn.v_proj.weight.absmax": "model-00001-of-00003.safetensors",
|
530 |
+
"model.layers.2.self_attn.v_proj.weight.nested_absmax": "model-00001-of-00003.safetensors",
|
531 |
+
"model.layers.2.self_attn.v_proj.weight.nested_quant_map": "model-00001-of-00003.safetensors",
|
532 |
+
"model.layers.2.self_attn.v_proj.weight.quant_map": "model-00001-of-00003.safetensors",
|
533 |
+
"model.layers.2.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00001-of-00003.safetensors",
|
534 |
"model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
535 |
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
536 |
"model.layers.20.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
|
|
|
988 |
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
|
989 |
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00003.safetensors",
|
990 |
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
991 |
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
992 |
"model.layers.3.self_attn.q_proj.bias": "model-00001-of-00003.safetensors",
|
993 |
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
994 |
"model.layers.3.self_attn.v_proj.bias": "model-00001-of-00003.safetensors",
|
995 |
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
|
|
|
|
|
|
|
|
|
|
|
996 |
"model.layers.30.input_layernorm.weight": "model-00002-of-00003.safetensors",
|
997 |
"model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
|
998 |
"model.layers.30.mlp.down_proj.weight.absmax": "model-00002-of-00003.safetensors",
|
|
|
1524 |
"model.layers.40.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1525 |
"model.layers.41.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
1526 |
"model.layers.41.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
1527 |
+
"model.layers.41.mlp.down_proj.weight.absmax": "model-00003-of-00003.safetensors",
|
1528 |
+
"model.layers.41.mlp.down_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
|
1529 |
+
"model.layers.41.mlp.down_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
|
1530 |
+
"model.layers.41.mlp.down_proj.weight.quant_map": "model-00003-of-00003.safetensors",
|
1531 |
+
"model.layers.41.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1532 |
"model.layers.41.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
1533 |
+
"model.layers.41.mlp.gate_proj.weight.absmax": "model-00003-of-00003.safetensors",
|
1534 |
+
"model.layers.41.mlp.gate_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
|
1535 |
+
"model.layers.41.mlp.gate_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
|
1536 |
+
"model.layers.41.mlp.gate_proj.weight.quant_map": "model-00003-of-00003.safetensors",
|
1537 |
+
"model.layers.41.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1538 |
"model.layers.41.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
1539 |
+
"model.layers.41.mlp.up_proj.weight.absmax": "model-00003-of-00003.safetensors",
|
1540 |
+
"model.layers.41.mlp.up_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
|
1541 |
+
"model.layers.41.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
|
1542 |
+
"model.layers.41.mlp.up_proj.weight.quant_map": "model-00003-of-00003.safetensors",
|
1543 |
+
"model.layers.41.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1544 |
"model.layers.41.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
1545 |
"model.layers.41.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
1546 |
"model.layers.41.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
|
|
1618 |
"model.layers.42.self_attn.v_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1619 |
"model.layers.43.input_layernorm.weight": "model-00003-of-00003.safetensors",
|
1620 |
"model.layers.43.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
|
1621 |
+
"model.layers.43.mlp.down_proj.weight.absmax": "model-00003-of-00003.safetensors",
|
1622 |
+
"model.layers.43.mlp.down_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
|
1623 |
+
"model.layers.43.mlp.down_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
|
1624 |
+
"model.layers.43.mlp.down_proj.weight.quant_map": "model-00003-of-00003.safetensors",
|
1625 |
+
"model.layers.43.mlp.down_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1626 |
"model.layers.43.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
|
1627 |
+
"model.layers.43.mlp.gate_proj.weight.absmax": "model-00003-of-00003.safetensors",
|
1628 |
+
"model.layers.43.mlp.gate_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
|
1629 |
+
"model.layers.43.mlp.gate_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
|
1630 |
+
"model.layers.43.mlp.gate_proj.weight.quant_map": "model-00003-of-00003.safetensors",
|
1631 |
+
"model.layers.43.mlp.gate_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1632 |
"model.layers.43.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
|
1633 |
+
"model.layers.43.mlp.up_proj.weight.absmax": "model-00003-of-00003.safetensors",
|
1634 |
+
"model.layers.43.mlp.up_proj.weight.nested_absmax": "model-00003-of-00003.safetensors",
|
1635 |
+
"model.layers.43.mlp.up_proj.weight.nested_quant_map": "model-00003-of-00003.safetensors",
|
1636 |
+
"model.layers.43.mlp.up_proj.weight.quant_map": "model-00003-of-00003.safetensors",
|
1637 |
+
"model.layers.43.mlp.up_proj.weight.quant_state.bitsandbytes__nf4": "model-00003-of-00003.safetensors",
|
1638 |
"model.layers.43.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
|
1639 |
"model.layers.43.self_attn.k_proj.bias": "model-00003-of-00003.safetensors",
|
1640 |
"model.layers.43.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
|
tokenizer_config.json
CHANGED
@@ -181,12 +181,12 @@
|
|
181 |
}
|
182 |
},
|
183 |
"bos_token": "<|begin▁of▁sentence|>",
|
184 |
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{
|
185 |
"clean_up_tokenization_spaces": false,
|
186 |
"eos_token": "<|end▁of▁sentence|>",
|
187 |
"extra_special_tokens": {},
|
188 |
"legacy": true,
|
189 |
-
"model_max_length":
|
190 |
"pad_token": "<|vision_pad|>",
|
191 |
"padding_side": "left",
|
192 |
"sp_model_kwargs": {},
|
|
|
181 |
}
|
182 |
},
|
183 |
"bos_token": "<|begin▁of▁sentence|>",
|
184 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
185 |
"clean_up_tokenization_spaces": false,
|
186 |
"eos_token": "<|end▁of▁sentence|>",
|
187 |
"extra_special_tokens": {},
|
188 |
"legacy": true,
|
189 |
+
"model_max_length": 131072,
|
190 |
"pad_token": "<|vision_pad|>",
|
191 |
"padding_side": "left",
|
192 |
"sp_model_kwargs": {},
|