#!/usr/bin/env python import os from collections.abc import Iterator import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient(model="deepseek-ai/DeepSeek-R1", provider="together", api_key=os.getenv("HF_TOKEN")) def fn(message: str, history: list[dict]) -> Iterator[str]: messages = [*history, {"role": "user", "content": message}] out = "" for chunk in client.chat_completion(messages=messages, max_tokens=2000, stream=True): out += chunk.choices[0].delta.content or "" yield out demo = gr.ChatInterface(fn=fn, type="messages", chatbot=gr.Chatbot(type="messages", allow_tags=["think"], scale=1)) if __name__ == "__main__": demo.launch()