from typing import Generator, List, Optional, Tuple, Union import os from dotenv import load_dotenv import gradio as gr from huggingface_hub import InferenceClient # Load environment variables load_dotenv() # Read system prompt from file prompt_path = os.path.join(os.path.dirname(__file__), "prompt.md") with open(prompt_path, "r", encoding="utf-8") as f: SYSTEM_PROMPT = f.read() # Initialize HuggingFace client with token from env client = InferenceClient(token=os.getenv("HUGGING_FACE_HUB_TOKEN")) def format_prompt(message: str, history: List[Tuple[str, str]]) -> str: """Format the conversation prompt for the model. Args: message: The current user message. history: List of past interactions (user, assistant). Returns: str: The formatted prompt for the model. """ prompt = f"<|system|>\n{SYSTEM_PROMPT}" for user, assistant in history: prompt += f"<|user|>\n{user}<|assistant|>\n{assistant}" prompt += f"<|user|>\n{message}<|assistant|>\n" return prompt def predict( message: str, history: List[Tuple[str, str]], temperature: float = 0.7, max_new_tokens: int = 1024, top_p: float = 0.9 ) -> str: """Generate a complete response to the user message. Args: message: The current user message. history: List of past interactions. temperature: Sampling temperature. max_new_tokens: Maximum number of tokens to generate. top_p: Top-p sampling parameter. Returns: str: Complete generated response. """ response = client.text_generation( format_prompt(message, history), temperature=temperature, max_new_tokens=max_new_tokens, top_p=top_p, repetition_penalty=1.2, do_sample=True, seed=42, ) return response # Main Gradio interface with gr.Blocks(css="styles.css", title="AstroAlien 3000") as demo: gr.Markdown("# 👽🛸 Bem-vindo ao AstroAlien 3000! 🌌✨", elem_classes="glass-card") chatbot = gr.Chatbot(type="messages", elem_classes="glass-card") with gr.Row(): temperature = gr.Slider(0, 1, value=0.7, label="Temperatura", elem_classes="glass-card") max_tokens = gr.Slider(1, 2048, value=1024, step=1, label="Máximos Tokens", elem_classes="glass-card") top_p = gr.Slider(0, 1, value=0.9, label="Top-p", elem_classes="glass-card") gr.ChatInterface( predict, chatbot=chatbot, additional_inputs=[temperature, max_tokens, top_p], type="messages" ) demo.launch()