import gradio as gr from llama_cpp import Llama model_path="llama-2-13b-chat.ggmlv3.q5_K_S.bin" llama = Llama(model_path) def predict(message, history): messages = [] for human_content, system_content in history: message_human = { "role": "user", "content": human_content + "\n", } message_system = { "role": "system", "content": system_content + "\n", } messages.append(message_human) messages.append(message_system) message_human = { "role": "user", "content": message + "\n", } messages.append(message_human) # Llamaでの回答を取得(ストリーミングオフ) response = llama.create_chat_completion(messages, stream=False) print(f"response: {response}") # 最後の'system'のメッセージを返す system_message = response['choices'][0]['message']['role'] == 'system' print(f"system_message: {system_message}") print(f"response['choices'][0]['message']['content']: {response['choices'][0]['message']['content']}") #if system_message: return response['choices'][0]['message']['content'] #else: # return "エラー:期待される応答が得られませんでした。" gr.ChatInterface(predict).launch()