Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient | |
# Initialize the client with the fine-tuned model | |
client = InferenceClient("Qwen/Qwen2.5-Coder-7B-Instruct") # Update if using another model | |
# Model's token limit | |
MODEL_TOKEN_LIMIT = 16384 | |
# Function to validate inputs | |
def validate_inputs(max_tokens, temperature, top_p, input_tokens): | |
if max_tokens + input_tokens > MODEL_TOKEN_LIMIT: | |
raise ValueError(f"Max tokens + input tokens must not exceed {MODEL_TOKEN_LIMIT}. Adjust the max tokens.") | |
if not (0.1 <= temperature <= 4.0): | |
raise ValueError("Temperature must be between 0.1 and 4.0.") | |
if not (0.1 <= top_p <= 1.0): | |
raise ValueError("Top-p must be between 0.1 and 1.0.") | |
# Function to calculate input token count (basic approximation) | |
def count_tokens(messages): | |
return sum(len(m["content"].split()) for m in messages) | |
# Response generation | |
def respond(message, history, system_message, max_tokens, temperature, top_p): | |
# Prepare messages for the model | |
messages = [{"role": "system", "content": system_message}] | |
for val in history: | |
if val[0]: # User's message | |
messages.append({"role": "user", "content": val[0]}) | |
if val[1]: # Assistant's response | |
messages.append({"role": "assistant", "content": val[1]}) | |
messages.append({"role": "user", "content": message}) | |
# Calculate input token count | |
input_tokens = count_tokens(messages) | |
max_allowed_tokens = MODEL_TOKEN_LIMIT - input_tokens | |
# Ensure max_tokens does not exceed the model's token limit | |
if max_tokens > max_allowed_tokens: | |
max_tokens = max_allowed_tokens | |
validate_inputs(max_tokens, temperature, top_p, input_tokens) | |
response = "" | |
# Generate response with streaming | |
for message in client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
stream=True, | |
temperature=temperature, | |
top_p=top_p, | |
): | |
token = message.choices[0].delta.content | |
response += token | |
yield response | |
# Updated system message | |
system_message = """ | |
You are an advanced AI assistant specialized in coding tasks. | |
- You deliver precise, error-free code in multiple programming languages. | |
- Analyze queries for logical accuracy and provide optimized solutions. | |
- Ensure clarity, brevity, and adherence to programming standards. | |
Guidelines: | |
1. Prioritize accurate, functional code. | |
2. Provide explanations only when necessary for understanding. | |
3. Handle tasks ethically, respecting user intent and legal constraints. | |
Thank you for using this system. Please proceed with your query. | |
""" | |
# Gradio Interface | |
demo = gr.ChatInterface( | |
respond, | |
additional_inputs=[ | |
gr.Textbox(value=system_message, label="System message", lines=10), | |
gr.Slider(minimum=1, maximum=16384, value=1000, step=1, label="Max new tokens"), # Default fixed | |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), | |
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"), | |
], | |
) | |
if __name__ == "__main__": | |
demo.launch() |