import gradio as gr # from huggingface_hub import InferenceClient from openai import OpenAI import os import requests openai_api_key = os.getenv('api_key') openai_api_base = os.getenv('url') db_url = os.getenv('db_url') db_api_key = os.getenv('db_api_key') model_name = "weblab-GENIAC/Tanuki-8x8B-dpo-v1.0" """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co./docs/huggingface_hub/v0.22.2/en/guides/inference """ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") client = OpenAI( api_key=openai_api_key, base_url=openai_api_base, ) def save_conversation(history, system_message): conversation_data = { "conversation": history, "index": (len(history) - 1, 1), # 最新の応答のインデックス "liked": None, # 評価はnull(None) "system_message": system_message, } headers = { "X-API-Key": db_api_key } response = requests.post(db_url, json=conversation_data, headers=headers) if response.status_code == 200: print("Conversation saved successfully") else: print(f"Failed to save conversation: {response.status_code}") def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [ {"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for new_response in client.chat.completions.create( model=model_name, messages=messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = new_response.choices[0].delta.content if token is not None: response += (token) yield response new_history = history + [(message, response)] save_conversation(new_history, system_message) """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ description = """ ### [Tanuki-8x8B-dpo-v1.0](https://huggingface.co./weblab-GENIAC/Tanuki-8x8B-dpo-v1.0)との会話(期間限定での公開) - 人工知能開発のため、原則として**このChatBotの入出力データは全て著作権フリー(CC0)で公開予定です**ので、ご注意ください。著作物、個人情報、機密情報、誹謗中傷などのデータを入力しないでください。 - **上記の条件に同意する場合のみ**、以下のChatbotを利用してください。 """ HEADER = description FOOTER = """### 注意 - コンテクスト長が4096までなので、あまり会話が長くなると、エラーで停止します。ページを再読み込みしてください。 - GPUサーバーが不安定なので、応答しないことがあるかもしれません。""" def vote(data: gr.LikeData, history): vote_data = { "conversation": history, "index": data.index, "liked": data.liked, "system_message": None, } headers = { "X-API-Key": db_api_key # APIキーを設定 } response = requests.post(db_url, json=vote_data, headers=headers) if response.status_code == 200: print("Vote recorded successfully") else: print(f"Failed to record vote: {response.status_code}") def run(): chatbot = gr.Chatbot( elem_id="chatbot", scale=1, show_copy_button=True, height="70%", layout="panel", ) with gr.Blocks(fill_height=True) as demo: gr.Markdown(HEADER) gr.ChatInterface( fn=respond, stop_btn="Stop Generation", cache_examples=False, multimodal=False, chatbot=chatbot, additional_inputs_accordion=gr.Accordion( label="Parameters", open=False, render=False ), additional_inputs=[ gr.Textbox(value="以下は、タスクを説明する指示です。要求を適切に満たす応答を書きなさい。", label="System message(試験用: 変えると性能が低下する可能性があります。)", render=False,), gr.Slider( minimum=1, maximum=4096, step=1, value=1024, label="Max tokens", visible=True, render=False, ), gr.Slider( minimum=0, maximum=1, step=0.1, value=0.3, label="Temperature", visible=True, render=False, ), gr.Slider( minimum=0, maximum=1, step=0.1, value=1.0, label="Top-p", visible=True, render=False, ), ], analytics_enabled=False, ) chatbot.like(vote, chatbot, None) gr.Markdown(FOOTER) demo.queue(max_size=256, api_open=False) demo.launch(share=False, quiet=True) if __name__ == "__main__": run()