import gradio as gr | |
from llama_cpp import Llama | |
model_path="llama-2-13b-chat.ggmlv3.q5_K_S.bin" | |
llama = Llama(model_path) | |
def predict(message, history): | |
messages = [] | |
for human_content, system_content in history: | |
message_human = { | |
"role": "user", | |
"content": human_content + "\n", | |
} | |
message_system = { | |
"role": "system", | |
"content": system_content + "\n", | |
} | |
messages.append(message_human) | |
messages.append(message_system) | |
message_human = { | |
"role": "user", | |
"content": message + "\n", | |
} | |
messages.append(message_human) | |
# Llamaでの回答を取得(ストリーミングオフ) | |
response = llama.create_chat_completion(messages, stream=False) | |
print(f"response: {response}") | |
# 最後の'system'のメッセージを返す | |
system_message = response['choices'][0]['message']['role'] == 'system' | |
print(f"system_message: {system_message}") | |
print(f"response['choices'][0]['message']['content']: {response['choices'][0]['message']['content']}") | |
#if system_message: | |
return response['choices'][0]['message']['content'] | |
#else: | |
# return "エラー:期待される応答が得られませんでした。" | |
gr.ChatInterface(predict).launch() | |