jianguozhang
commited on
Update xLAM tokenizer chat templates and examples output
Browse filesSummarizations:
- fc mode:
- `<s>[INST] user</INST> to <s> [INST] user </INST>`
- fixed `\n\n` issues
Check [Jupyter notebook](https://huggingface.co./Salesforce/xLAM-8x22b-r/blob/main/example/xlam_chat_template_examples_11_21.ipynb) for template and examples output.
- tokenizer_config.json +1 -1
tokenizer_config.json
CHANGED
@@ -32,7 +32,7 @@
|
|
32 |
"bos_token": "<s>",
|
33 |
"chat_template": {
|
34 |
"default": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",
|
35 |
-
"tool_use": "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- if messages[1][\"role\"] == \"system\" %}\n {%- set format_message = messages[1][\"content\"] %}\n {%- set loop_messages = messages[2:] %}\n {%- else %}\n {%- set loop_messages = messages[1:] %}\n {%- endif %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if ((message[\"role\"] == \"user\") and (ns.index % 2 != 0)) or (message[\"role\"] not in [\"user\", \"assistant\", \"tool\", \"tool_results\"]) %}\n {{- raise_exception(\"After the optional system message, conversation roles can only be from user/assistant/tool; After each tool message, the next message must be from the assistant\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n{%- endfor %}\n\n{{- bos_token }}\n{{- \"[INST]\" }}\n{%- if system_message is not defined %}\n {% set system_message %}\nYou are an expert in composing functions. You are given a question and a set of possible functions. \nBased on the question, you will need to make one or more function/tool calls to achieve the purpose. \nIf none of the functions can be used, point it out and refuse to answer. \nIf the given question lacks the parameters required by the function, also point it out.{% endset %}\n{%- endif %}\n{{- \"\n[BEGIN OF TASK INSTRUCTION]\n\" + system_message + \"\n[END OF TASK INSTRUCTION]\n\n\" }}\n\n{%- if tools is not none %}\n {{- \"[BEGIN OF AVAILABLE_TOOLS]\n\" }}\n {{- tools|string }}\n {{- \"\n[END OF AVAILABLE_TOOLS]\n\n\" }}\n{%- endif %}\n \n{%- if format_message is not defined %}\n {% set format_message %}\nYour output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n {% endset %}\n{%- endif %}\n{{- \"[BEGIN OF FORMAT INSTRUCTION]\n\" + format_message + \"[END OF FORMAT INSTRUCTION]\n\n\" }}\n\n{%- if loop_messages[0][\"role\"] == \"user\" %}\n {%- set query = loop_messages[0][\"content\"] %}\n {{- \"[BEGIN OF QUERY]\n\" + query + \"\n[END OF QUERY]\n\" }}\n {%- set loop_messages =
|
36 |
},
|
37 |
"clean_up_tokenization_spaces": false,
|
38 |
"eos_token": "</s>",
|
|
|
32 |
"bos_token": "<s>",
|
33 |
"chat_template": {
|
34 |
"default": "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n\n{{- bos_token }}\n{%- for message in loop_messages %}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif %}\n {%- if message['role'] == 'user' %}\n {%- if loop.first and system_message is defined %}\n {{- ' [INST] ' + system_message + '\\n\\n' + message['content'] + ' [/INST]' }}\n {%- else %}\n {{- ' [INST] ' + message['content'] + ' [/INST]' }}\n {%- endif %}\n {%- elif message['role'] == 'assistant' %}\n {{- ' ' + message['content'] + eos_token}}\n {%- else %}\n {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}\n {%- endif %}\n{%- endfor %}\n",
|
35 |
+
"tool_use": "{%- if messages[0][\"role\"] == \"system\" %}\n {%- set system_message = messages[0][\"content\"] %}\n {%- if messages[1][\"role\"] == \"system\" %}\n {%- set format_message = messages[1][\"content\"] %}\n {%- set loop_messages = messages[2:] %}\n {%- else %}\n {%- set loop_messages = messages[1:] %}\n {%- endif %}\n{%- else %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block checks for alternating user/assistant messages, skipping tool calling messages #}\n{%- set ns = namespace() %}\n{%- set ns.index = 0 %}\n{%- for message in loop_messages %}\n {%- if ((message[\"role\"] == \"user\") and (ns.index % 2 != 0)) or (message[\"role\"] not in [\"user\", \"assistant\", \"tool\", \"tool_results\"]) %}\n {{- raise_exception(\"After the optional system message, conversation roles can only be from user/assistant/tool; After each tool message, the next message must be from the assistant\") }}\n {%- endif %}\n {%- set ns.index = ns.index + 1 %}\n{%- endfor %}\n\n{{- bos_token }}\n{{- \" [INST]\" }}\n{%- if system_message is not defined %}\n {% set system_message %}\nYou are an expert in composing functions. You are given a question and a set of possible functions. \nBased on the question, you will need to make one or more function/tool calls to achieve the purpose. \nIf none of the functions can be used, point it out and refuse to answer. \nIf the given question lacks the parameters required by the function, also point it out.{% endset %}\n{%- endif %}\n{{- \"\n[BEGIN OF TASK INSTRUCTION]\n\" + system_message + \"\n[END OF TASK INSTRUCTION]\n\n\" }}\n\n{%- if tools is not none %}\n {{- \"[BEGIN OF AVAILABLE_TOOLS]\n\" }}\n {{- tools|string }}\n {{- \"\n[END OF AVAILABLE_TOOLS]\n\n\" }}\n{%- endif %}\n \n{%- if format_message is not defined %}\n {% set format_message %}\nYour output should be in the JSON format, which specifies a list of function calls. The example format is as follows. Please make sure the parameter type is correct. If no function call is needed, please make tool_calls an empty list '[]'.\n```{\"thought\": \"the thought process, or an empty string\", \"tool_calls\": [{\"name\": \"api_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}}]}```\n {% endset %}\n{%- endif %}\n{{- \"[BEGIN OF FORMAT INSTRUCTION]\n\" + format_message + \"[END OF FORMAT INSTRUCTION]\n\n\" }}\n\n{%- if loop_messages[0][\"role\"] == \"user\" %}\n {%- set query = loop_messages[0][\"content\"] %}\n {%- set remaining_messages = loop_messages[1:] %}\n {{- \"[BEGIN OF QUERY]\n\" + query + \"\n[END OF QUERY]\" }}\n {%- if remaining_messages|length == 0 %}\n {{- \"\n\" }}\n {%- endif %}\n {%- set loop_messages = remaining_messages %}\n{%- endif %}\n\n{% if loop_messages %}\n {{- \"\n[BEGIN OF HISTORY STEPS]\n[\" }}\n {%- set step_id = namespace(value=1) %}\n {%- for message in loop_messages %}\n {%- if message[\"role\"] == \"assistant\" %}\n {%- if message.tool_calls is defined and message.tool_calls is not none %}\n {% if message.tool_calls is iterable and (message.tool_calls is not string and message.tool_calls is not mapping) %}\n {{- \"{'thought':\" + message.content|tojson + \", 'tool_calls':\" + message.tool_calls|tojson + \", 'step_id':\" + step_id.value|string + \",\" }}\n {%- else %}\n {{- raise_exception(\"The tool_calls must be a list!\") }}\n {%- endif %}\n {%- else %}\n {{- \"{'thought':\" + message.content|tojson + \", 'tool_calls':[]\" + \", 'step_id':\" + step_id.value|string + \",\" }}\n {%- endif %}\n {%- if loop.nextitem is not defined %}\n {{- \" 'next_observation':''}\" }}\n {%- elif loop.nextitem[\"role\"] == \"user\" %}\n {{- \" 'next_observation':''\" }}\n {%- elif loop.nextitem[\"role\"] != \"tool_results\" and loop.nextitem[\"role\"] != \"tool\" %}\n {{- \" 'next_observation':''},\" }}\n {%- endif %}\n {%- elif message[\"role\"] == \"tool_results\" or message[\"role\"] == \"tool\" %}\n {{- \" 'next_observation':\" + message.content|tojson }}\n {%- if loop.nextitem is defined and loop.nextitem[\"role\"] != \"user\" %}\n {{- \"}, \" }}\n {%- elif not loop.nextitem is defined %}\n {{- \"} \" }}\n {%- endif %}\n {%- set step_id.value = step_id.value + 1 %}\n {%- elif message[\"role\"] == \"user\" %}\n {{- \", 'user_input':\" + message.content|tojson }}\n {%- if loop.nextitem is defined %}\n {{- \"}, \" }}\n {%- else %}\n {{- \"} \" }}\n {%- endif %}\n {%- set step_id.value = step_id.value + 1 %}\n {%- endif %}\n {%- endfor %}\n {{- \"]\n[END OF HISTORY STEPS]\n\n\"}}\n{%- endif %}\n{{- \" [/INST]\" }}"
|
36 |
},
|
37 |
"clean_up_tokenization_spaces": false,
|
38 |
"eos_token": "</s>",
|