from dotenv import load_dotenv

import os
from pathlib import Path

import gradio as gr
from huggingface_hub import InferenceClient
from openai import OpenAI

from prompt_template import PromptTemplate, PromptLoader
from assistant import AIAssistant

# Load .env file
load_dotenv()

API_KEY = os.getenv('API_KEY')

# Load prompts from YAML
prompts = PromptLoader.load_prompts("prompts.yaml")

# Available models and their configurations
MODELS = {
    "Llama 3.3 70B Instruct": {
        "name": "meta/llama-3.3-70b-instruct",
    },
    "Llama 3.1 405B Instruct": {
        "name": "meta/llama-3.1-405b-instruct",
    },
    "Llama 3.2 3B Instruct": {
        "name": "meta/llama-3.2-3b-instruct",
    },
    "Falcon 3 7B Instruct": {
        "name": "tiiuae/falcon3-7b-instruct",
    },
    "Granite 3.0 8B Instruct": {
        "name": "ibm/granite-3.0-8b-instruct",
    }
}

# Available prompt strategies
PROMPT_STRATEGIES = {
    "Default": "system_context",
    "Chain of Thought": "cot_prompt",
    "Knowledge-based": "knowledge_prompt",
    "Few-shot Learning": "few_shot_prompt",
    "Meta-prompting": "meta_prompt"
}

def create_assistant(model_name):
    client = OpenAI(
        base_url = "https://integrate.api.nvidia.com/v1",
        api_key = API_KEY
    )
    
    # Should use MODELS dictionary to get the actual model name
    model_name = MODELS[model_name]["name"]  # Add this line
    
    return AIAssistant(
        client=client,
        model=model_name
    )

def respond(
    message,
    history: list[tuple[str, str]],
    model_name,
    prompt_strategy,
    override_params: bool,
    max_tokens,
    temperature,
    top_p,
):
    assistant = create_assistant(model_name)
    
    # Get selected prompt template and system context
    prompt_template: PromptTemplate = prompts[PROMPT_STRATEGIES[prompt_strategy]]
    system_context: PromptTemplate = prompts["system_context"]
    
    # Format system context with the selected prompt strategy
    formatted_system_message = system_context.format(prompt_strategy=prompt_template.template)
    
    # Prepare messages with proper format
    messages = [{"role": "system", "content": formatted_system_message}]
    
    # Add conversation history
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": str(user_msg)})
        if assistant_msg:
            messages.append({"role": "assistant", "content": str(assistant_msg)})
    
    # Add current message
    messages.append({"role": "user", "content": str(message)})

    # Get generation parameters
    generation_params = prompt_template.parameters if not override_params else {
        "max_tokens": max_tokens,
        "temperature": temperature,
        "top_p": top_p
    }

    try:
        for response in assistant.generate_response(
            prompt_template=prompt_template,
            generation_params=generation_params,
            stream=True,
            messages=messages
        ):
            yield response
    except Exception as e:
        yield f"Error: {str(e)}"

with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            model_dropdown = gr.Dropdown(
                choices=list(MODELS.keys()),
                value=list(MODELS.keys())[0],
                label="Select Model"
            )
            prompt_strategy_dropdown = gr.Dropdown(
                choices=list(PROMPT_STRATEGIES.keys()),
                value=list(PROMPT_STRATEGIES.keys())[0],
                label="Select Prompt Strategy"
            )
            
    with gr.Row():
        override_params = gr.Checkbox(
            label="Override Template Parameters",
            value=False
        )
    
    with gr.Row():
        with gr.Column(visible=False) as param_controls:
            max_tokens = gr.Slider(
                minimum=1,
                maximum=2048,
                value=512,
                step=1,
                label="Max new tokens"
            )
            temperature = gr.Slider(
                minimum=0.1,
                maximum=4.0,
                value=0.7,
                step=0.1,
                label="Temperature"
            )
            top_p = gr.Slider(
                minimum=0.1,
                maximum=1.0,
                value=0.95,
                step=0.05,
                label="Top-p (nucleus sampling)"
            )
    chatbot = gr.ChatInterface(
        fn=respond,
        additional_inputs=[
            model_dropdown,
            prompt_strategy_dropdown,
            override_params,
            max_tokens,
            temperature,
            top_p,
        ]
    )

    # Parameters and Prompt Details section below the chat
    with gr.Row(equal_height=True):
        with gr.Column(scale=1, min_width=300):
            with gr.Accordion("Current Prompt Details", open=False):
                system_prompt_display = gr.TextArea(
                    label="System Prompt",
                    interactive=False,
                    lines=20
                )
                current_messages_display = gr.JSON(
                    label="Full Conversation Context",
                )
    
    def toggle_param_controls(override):
        return gr.Column(visible=override)
    
    def update_prompt_display(prompt_strategy):
        prompt_template = prompts[PROMPT_STRATEGIES[prompt_strategy]]
        system_context = prompts["system_context"]
        formatted_system_message = system_context.format(prompt_strategy=prompt_template.template)
        
        return (
            formatted_system_message,
            {
                "Template Parameters": prompt_template.parameters,
                "Prompt Strategy": prompt_template.template
            }
        )
    
    # Update prompt display when strategy changes
    prompt_strategy_dropdown.change(
        update_prompt_display,
        inputs=[prompt_strategy_dropdown],
        outputs=[system_prompt_display, current_messages_display]
    )
    
    override_params.change(
        toggle_param_controls,
        inputs=[override_params],
        outputs=[param_controls]
    )

if __name__ == "__main__":
    demo.launch()