Struggling to get the weather example working
When I implement the README, the output is just random responses which changes with each run. Here are a bunch of example scripts, and the responses:
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
FUNCTION_METADATA = [
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "This function gets the current weather in a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city, e.g., San Francisco"
},
"format": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "The temperature unit to use."
}
},
"required": ["city"]
}
}
},
{
"type": "function",
"function": {
"name": "get_clothes",
"description": "This function provides a suggestion of clothes to wear based on the current weather",
"parameters": {
"type": "object",
"properties": {
"temperature": {
"type": "string",
"description": "The temperature, e.g., 15 C or 59 F"
},
"condition": {
"type": "string",
"description": "The weather condition, e.g., 'Cloudy', 'Sunny', 'Rainy'"
}
},
"required": ["temperature", "condition"]
}
}
}
]
device = "cuda"
# Load up the model and teh tokenzier
model = AutoModelForCausalLM.from_pretrained('Trelis/Mistral-7B-Instruct-v0.2-function-calling-v3', trust_remote_code=True)#, torch_dtype=torch.float16)
model.half()
model.to(device)
tokenizer = AutoTokenizer.from_pretrained("Trelis/Mistral-7B-Instruct-v0.2-function-calling-v3", trust_remote_code=True)
B_FUNC, E_FUNC = "You have access to the following functions. Use them if required:\n\n", "\n\n"
B_INST, E_INST = "[INST] ", " [/INST]" #Llama / Mistral style
prompt = f"{B_INST}{B_FUNC}{json.dumps(FUNCTION_METADATA).strip()}{E_FUNC}What is the current weather in London?{E_INST}\n\n"
print()
print("Raw prompt")
print(prompt)
# Tokenize, run the module and decode
model_inputs = tokenizer.apply_chat_template(prompt, return_tensors="pt").to(device)
generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print()
print("Generated")
print(decoded[0])
Results with:
Loading checkpoint shards: 100% [00:03<00:00, 1.66s/it]
Raw prompt
[INST] You have access to the following functions. Use them if required:
[{"type": "function", "function": {"name": "get_current_weather", "description": "This function gets the current weather in a given city", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city, e.g., San Francisco"}, "format": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit to use."}}, "required": ["city"]}}}, {"type": "function", "function": {"name": "get_clothes", "description": "This function provides a suggestion of clothes to wear based on the current weather", "parameters": {"type": "object", "properties": {"temperature": {"type": "string", "description": "The temperature, e.g., 15 C or 59 F"}, "condition": {"type": "string", "description": "The weather condition, e.g., 'Cloudy', 'Sunny', 'Rainy'"}}, "required": ["temperature", "condition"]}}}]
What is the current weather in London? [/INST]
Generated
<s> [INST] 1. Which programming language is most suitable for microservices architecture? The language must have good interprocess communication capabilities, strong ecosystem, and high-performance I/O
2. Which cloud platform is most suitable for Kubernetes deployment because microservices in Kubernetes scale very well
3. Kubernetes has what it takes to be a mature container orchestration engine (such as good scaling capabilities, a good logging API, and multiple methods to deploy applications)?
4. Which
python normal_token.py 46.64s user 45.23s system 749% cpu 12.252 total
Rerunning the same code above with no changes:
Generated
<s> [INST] 2023-02-12
3:29 PM -5:29 AM, 2023-02-12 UTC
5:30 AM -7:30 PM, 2023-02-13 UTC
7:31 PM -9:31 AM, 2023-02-14 UTC
9:32 AM -11:3
Okay, a little off topic. Now lets try the apply_chat packing. Here is the updated code:
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
FUNCTION_METADATA = [] # Plz trust me this is the same as above
messages = [
{
"role": "function_metadata",
"content": json.dumps(FUNCTION_METADATA)
},
{
"role": "user",
"content": "What is the current weather in London?"
},
]
device = "cuda"
# Load up the model and teh tokenzier
model = AutoModelForCausalLM.from_pretrained('Trelis/Mistral-7B-Instruct-v0.2-function-calling-v3', trust_remote_code=True)#, torch_dtype=torch.float16)
model.half()
model.to(device)
tokenizer = AutoTokenizer.from_pretrained("Trelis/Mistral-7B-Instruct-v0.2-function-calling-v3", trust_remote_code=True)
prompt = tokenizer.apply_chat_template(messages, tokenize=False)
print()
print("Raw prompt")
print(prompt)
# Tokenize, run the module and decode
model_inputs = tokenizer.apply_chat_template(prompt, return_tensors="pt").to(device)
generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print()
print("Generated")
print(decoded[0])
What does this return?
Loading checkpoint shards: 100%
Raw prompt
<s> [INST] You have access to the following functions. Use them if required:
[{"type": "function", "function": {"name": "get_current_weather", "description": "This function gets the current weather in a given city", "parameters": {"type": "object", "properties": {"city": {"type": "string", "description": "The city, e.g., San Francisco"}, "format": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The temperature unit to use."}}, "required": ["city"]}}}, {"type": "function", "function": {"name": "get_clothes", "description": "This function provides a suggestion of clothes to wear based on the current weather", "parameters": {"type": "object", "properties": {"temperature": {"type": "string", "description": "The temperature, e.g., 15 C or 59 F"}, "condition": {"type": "string", "description": "The weather condition, e.g., 'Cloudy', 'Sunny', 'Rainy'"}}, "required": ["temperature", "condition"]}}}]
What is the current weather in London? [/INST]
Generated
<s> [INST] (1) The average person is 30 inches tall
(2) The person is five feet ten inches tall
(3) The person is five inches tall</s>
I'm at a loss for whats wrong. I've tried everything I can think of, I've spent hours watching the how-to videos. I'm fully stuck. It might be helpful to know that if I switch back to the v0.1 mistral tokenizer with either model, I can generate normal chatty LLM responses, just without the capacity for functions.
Thanks in advance.
I'm running the runpod one click template and getting:
[INST] You have access to the following functions. Use them if required:
[
{
"type": "function",
"function": {
"name": "get_current_weather",
"description": "This function gets the current weather in a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city, e.g., San Francisco"
},
"format": {
"type": "string",
"enum": [
"celsius",
"fahrenheit"
],
"description": "The temperature unit to use."
}
},
"required": [
"city"
]
}
}
},
{
"type": "function",
"function": {
"name": "get_clothes",
"description": "This function provides a suggestion of clothes to wear based on the current weather",
"parameters": {
"type": "object",
"properties": {
"temperature": {
"type": "string",
"description": "The temperature, e.g., 15 C or 59 F"
},
"condition": {
"type": "string",
"description": "The weather condition, e.g., 'Cloudy', 'Sunny', 'Rainy'"
}
},
"required": [
"temperature",
"condition"
]
}
}
}
]
What is the current weather in London? [/INST]
Function call: {
"name": "get_current_weather",
"arguments": {
"city": "London"
}
}</s><s>[INST] Here is the response to the function call. If helpful, use it to respond to my question:
{
"temperature": "15 C",
"condition": "Cloudy"
} [/INST]
user: What is the current weather in London?
function_call: {
"name": "get_current_weather",
"arguments": {
"city": "London"
}
}
function_response: {
"temperature": "15 C",
"condition": "Cloudy"
}
assistant:The temperature is 15 degrees Celsius and it is cloudy in London
My code is the same as yours except:
- I'm using do_sample=False
- in json dumps for function_metadata I'm using an added parameter indent=4
- Ahhh I see the issues:
In both cases you are applying the prompt template twice. In the first case you are manually setting up the prompt AND using apply_chat_template, which is applying the prompt again.
In the second case, you are literally applying the chat template twice.
I recommend your second approach, but only apply the caht template once with:
'model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)'
OMG, this works like a charm. Thank you so much!
For anyone following along, here is the resulting, fully working code:
from transformers import AutoModelForCausalLM, AutoTokenizer
import json
FUNCTION_METADATA = [ ... ] # Same as above
messages = [
{
"role": "function_metadata",
"content": json.dumps(FUNCTION_METADATA)
},
{
"role": "user",
"content": "Whats the weather link in Seattle Wa this weekend?"
},
]
device = "cuda"
# Load up the model and teh tokenzier
model = AutoModelForCausalLM.from_pretrained('Trelis/Mistral-7B-Instruct-v0.2-function-calling-v3', trust_remote_code=True)#, torch_dtype=torch.float16)
model.half()
model.to(device)
tokenizer = AutoTokenizer.from_pretrained("Trelis/Mistral-7B-Instruct-v0.2-function-calling-v3", trust_remote_code=True)
# Tokenize, run the module and decode
model_inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(device)
generated_ids = model.generate(model_inputs, max_new_tokens=100, do_sample=True)
decoded = tokenizer.batch_decode(generated_ids)
print()
print("Generated")
print(decoded[0])
Thanks! I'll put your example on the model card