import gradio as gr from huggingface_hub import InferenceClient from openai import OpenAI import anthropic import os hf_client = InferenceClient() openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) anthropic_client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) MODEL_OPTIONS = { "Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1", "Qwen2-72B-Instruct": "Qwen/Qwen2-72B-Instruct", "GPT-3.5-turbo": "gpt-3.5-turbo", "GPT-4": "gpt-4", "claude-3-5-sonnet": "claude-3-5-sonnet-20240620", } def generate_text(model_choice, messages): full_response = "" if "Mixtral" in model_choice or "Qwen" in model_choice: prompt = "\n".join([f"Human: {m[0]}\nAI: {m[1] if m[1] else ''}" for m in messages]) prompt += f"\nHuman: {messages[-1][0]}\nAI:" stream = hf_client.text_generation( model=MODEL_OPTIONS[model_choice], prompt=prompt, max_new_tokens=1000, temperature=0.7, do_sample=True, repetition_penalty=1.1, stream=True ) for response in stream: full_response += response # This line is correct now yield full_response elif "GPT" in model_choice: openai_messages = [{"role": "system", "content": "You are a helpful assistant."}] for m in messages: openai_messages.append({"role": "user", "content": m[0]}) if m[1]: openai_messages.append({"role": "assistant", "content": m[1]}) stream = openai_client.chat.completions.create( model=MODEL_OPTIONS[model_choice], messages=openai_messages, max_tokens=1000, temperature=0.7, stream=True ) for chunk in stream: if chunk.choices[0].delta.content: full_response += chunk.choices[0].delta.content yield full_response elif "claude" in model_choice: claude_messages = [] for msg in messages: if msg[0]: # User message claude_messages.append({"role": "user", "content": msg[0]}) if msg[1]: # AI response claude_messages.append({"role": "assistant", "content": msg[1]}) if not claude_messages: claude_messages = [{"role": "user", "content": "Hello"}] with anthropic_client.messages.stream( model=MODEL_OPTIONS[model_choice], max_tokens=1024, messages=claude_messages ) as stream: for text in stream.text_stream: full_response += text yield full_response else: yield "Unsupported model" def user(user_message, history): history = history or [] return "", history + [[user_message, None]] def bot(history, model_choice): if not history: return [] bot_message = generate_text(model_choice, history) history[-1][1] = "" for chunk in bot_message: history[-1][1] = chunk yield history with gr.Blocks() as iface: chatbot = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") model_dropdown = gr.Dropdown( choices=list(MODEL_OPTIONS.keys()), value="Mixtral-8x7B-Instruct-v0.1", label="Select Model" ) msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, [chatbot, model_dropdown], chatbot ) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": iface.launch(debug=True)