vllm 0.6.6加速qwen2.5-7B模型出错
#15
by
Moses25
- opened
vllm 0.6.6
transformers 4.47.0
模型:Qwen/Qwen2.5-7B-Instruct
启动脚本
model_path=/workspace/models/Qwen2.5-3B-Instruct
CUDA_VISIBLE_DEVICES=4,5 python -m vllm.entrypoints.openai.api_server --model=$model_path \
--trust-remote-code --host 0.0.0.0 --port 7777 \
--gpu-memory-utilization 0.99 \
--enforce-eager \
--chat-template-content-format openai \
--tensor-parallel-size 2 --served-model-name chatbot
from openai import OpenAI
# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:7777/v1"
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
def chatbot(msgs:list[dict]) -> str:
call_args = {
'temperature': 0.7,
'top_p': 0.9,
'top_k': 40,
'max_tokens': 2048, # output-len
'presence_penalty': 1.0,
'frequency_penalty': 0.0,
"repetition_penalty":1.0,
# "stop":["</s>"],
}
chat_response = client.chat.completions.create(
model="chatbot",
messages=msgs,
extra_body=call_args
)
# print("Chat response:", chat_response)
return chat_response.choices[0].message.content
def single_chatbot(text:str,system_prompt="") -> str:
system_prompt = system_prompt if system_prompt else "You are helpfull assitant,help humman as much as you can."
users = [{"role":"system","content":system_prompt},
{"role":"user","content":str(text)}]
# print(users)
return chatbot(users)
single_chatbot("你是谁")
报错如下:
rendered_chat = compiled_template.render(
File "/usr/local/lib/python3.10/dist-packages/jinja2/environment.py", line 1295, in render
self.environment.handle_exception()
File "/usr/local/lib/python3.10/dist-packages/jinja2/environment.py", line 942, in handle_exception
raise rewrite_traceback_stack(source=source)
File "<template>", line 16, in top-level template code
TypeError: can only concatenate str (not "list") to str
是chat-template的格式有问题么?如何解决?