import gradio as gr
from llama_cpp import Llama

model_path="llama-2-13b-chat.ggmlv3.q5_K_S.bin"
llama = Llama(model_path)

def predict(message, history):
    messages = []
    for human_content, system_content in history:
        message_human = {
            "role": "user",
            "content": human_content + "\n",
        }
        message_system = {
            "role": "system",
            "content": system_content + "\n",
        }
        messages.append(message_human)
        messages.append(message_system)
    message_human = {
        "role": "user",
        "content": message + "\n",
    }
    messages.append(message_human)
    # Llamaでの回答を取得（ストリーミングオフ）
    response = llama.create_chat_completion(messages, stream=False)
    print(f"response: {response}")
    # 最後の'system'のメッセージを返す
    system_message = response['choices'][0]['message']['role'] == 'system'
    print(f"system_message: {system_message}")
    print(f"response['choices'][0]['message']['content']: {response['choices'][0]['message']['content']}")
    #if system_message:
    return response['choices'][0]['message']['content']
    #else:
    #    return "エラー：期待される応答が得られませんでした。"

gr.ChatInterface(predict).launch()