|
from vllm import LLM, SamplingParams |
|
|
|
model_path = "../model_weights" |
|
|
|
model = LLM(model=model_path,tokenizer=model_path, dtype='bfloat16',seed=1234) |
|
sampling_params = SamplingParams(repetition_penalty = 1.05, |
|
temperature = 0.7, |
|
top_k = 20, |
|
top_p = 0.8, |
|
max_tokens = 512) |
|
|
|
messages = [ |
|
{"role": "system", "content": "下午好!这里是曜影医疗预约中心,我是Lisa,请问有什么可以帮您?"} |
|
] |
|
|
|
|
|
prompts = ["能告诉我你们全科问诊收费价格吗?",'那首次购买感染及持续咳嗽诊断套餐的价格呢?',"能告诉我你们专科问诊收费价格吗?", |
|
'能告诉我你们急诊收费价格吗?','能告诉我你们心理科&精神科问诊收费价格吗?','能告诉我你们康复理疗收费价格吗?', |
|
'能告诉我你们整脊收费价格吗?'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
response = '' |
|
count = 1 |
|
for question in prompts: |
|
messages.append({"role":"user", "content": question}) |
|
response = model.chat(messages, add_generation_prompt=True, sampling_params=sampling_params) |
|
print(response) |
|
response = response[0].outputs[0].text |
|
messages.append({"role": "system", "content":response}) |
|
print("##第",count,"轮次##") |
|
for message in messages: |
|
print(message) |
|
count +=1 |