vllm 0.6.6加速qwen2.5-7B模型出错

#15
by Moses25 - opened

vllm 0.6.6
transformers 4.47.0
模型:Qwen/Qwen2.5-7B-Instruct
启动脚本

model_path=/workspace/models/Qwen2.5-3B-Instruct
CUDA_VISIBLE_DEVICES=4,5 python  -m vllm.entrypoints.openai.api_server --model=$model_path \
        --trust-remote-code --host 0.0.0.0  --port 7777 \
        --gpu-memory-utilization 0.99 \
        --enforce-eager \
        --chat-template-content-format openai \
        --tensor-parallel-size 2 --served-model-name chatbot
from openai import OpenAI
# Set OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:7777/v1"

client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
def chatbot(msgs:list[dict]) -> str:
    call_args = {
            'temperature': 0.7,
            'top_p': 0.9,
            'top_k': 40,
            'max_tokens': 2048, # output-len
            'presence_penalty': 1.0,
            'frequency_penalty': 0.0,
            "repetition_penalty":1.0,
#             "stop":["</s>"],
        }
    
    chat_response = client.chat.completions.create(
        model="chatbot",
        messages=msgs,
        extra_body=call_args
    )
#     print("Chat response:", chat_response)
    return chat_response.choices[0].message.content

def single_chatbot(text:str,system_prompt="") -> str:
    system_prompt = system_prompt if system_prompt else "You are helpfull assitant,help humman as much as you can."
    users = [{"role":"system","content":system_prompt},
            {"role":"user","content":str(text)}]
#     print(users)
    return chatbot(users)
single_chatbot("你是谁")

报错如下:

rendered_chat = compiled_template.render(
  File "/usr/local/lib/python3.10/dist-packages/jinja2/environment.py", line 1295, in render
    self.environment.handle_exception()
  File "/usr/local/lib/python3.10/dist-packages/jinja2/environment.py", line 942, in handle_exception
  raise rewrite_traceback_stack(source=source)
  File "<template>", line 16, in top-level template code
TypeError: can only concatenate str (not "list") to str

是chat-template的格式有问题么?如何解决?

Sign up or log in to comment