import os
from typing import Literal, Optional

from pydantic import Field
from pydantic_settings import BaseSettings


class Settings(BaseSettings):
    ENVIRONMENT: str
    PORT: int = 8000
    VECTOR_DATABASE: Literal["weaviate"] = "weaviate"

    OPENAI_API_KEY: Optional[str] = None
    OPENAI_MODEL: str = "gpt-3.5-turbo"

    WEAVIATE_CLIENT_URL: str = "http://localhost:8080"

    LLM_MODE: Literal["openai", "mock", "local"] = "mock"
    EMBEDDING_MODE: Literal["openai", "mock", "local"] = "mock"

    LOCAL_DATA_FOLDER: str = "local_data/test"

    DEFAULT_QUERY_SYSTEM_PROMPT: str = "You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided."

    LOCAL_HF_EMBEDDING_MODEL_NAME: str = "BAAI/bge-small-en-v1.5"

    LOCAL_HF_LLM_REPO_ID: str = "TheBloke/Llama-2-7B-Chat-GGUF"
    LOCAL_HF_LLM_MODEL_FILE: str = "llama-2-7b-chat.Q4_K_M.gguf"

    # LLM config
    LLM_TEMPERATURE: float = Field(
        default=0.1, description="The temperature to use for sampling."
    )
    LLM_MAX_NEW_TOKENS: int = Field(
        default=256,
        description="The maximum number of tokens to generate.",
    )
    LLM_CONTEXT_WINDOW: int = Field(
        default=3900,
        description="The maximum number of context tokens for the model.",
    )

    # UI
    IS_UI_ENABLED: bool = True
    UI_PATH: str = "/"

    # Rerank
    IS_RERANK_ENABLED: bool = True
    RERANK_TOP_N: int = 3
    RERANK_MODEL_NAME: str = "cross-encoder/ms-marco-MiniLM-L-2-v2"

    class Config:
        case_sensitive = True
        env_file_encoding = "utf-8"


environment = os.environ.get("ENVIRONMENT", "local")
settings = Settings(
    ENVIRONMENT=environment,
    # ".env.{environment}" takes priority over ".env"
    _env_file=[".env", f".env.{environment}"],
)