import os from typing import Literal, Optional from pydantic import Field from pydantic_settings import BaseSettings class Settings(BaseSettings): ENVIRONMENT: str PORT: int = 8000 VECTOR_DATABASE: Literal["weaviate"] = "weaviate" OPENAI_API_KEY: Optional[str] = None OPENAI_MODEL: str = "gpt-3.5-turbo" WEAVIATE_CLIENT_URL: str = "http://localhost:8080" LLM_MODE: Literal["openai", "mock", "local"] = "mock" EMBEDDING_MODE: Literal["openai", "mock", "local"] = "mock" LOCAL_DATA_FOLDER: str = "local_data/test" DEFAULT_QUERY_SYSTEM_PROMPT: str = "You can only answer questions about the provided context. If you know the answer but it is not based in the provided context, don't provide the answer, just state the answer is not in the context provided." LOCAL_HF_EMBEDDING_MODEL_NAME: str = "BAAI/bge-small-en-v1.5" LOCAL_HF_LLM_REPO_ID: str = "TheBloke/Llama-2-7B-Chat-GGUF" LOCAL_HF_LLM_MODEL_FILE: str = "llama-2-7b-chat.Q4_K_M.gguf" # LLM config LLM_TEMPERATURE: float = Field( default=0.1, description="The temperature to use for sampling." ) LLM_MAX_NEW_TOKENS: int = Field( default=256, description="The maximum number of tokens to generate.", ) LLM_CONTEXT_WINDOW: int = Field( default=3900, description="The maximum number of context tokens for the model.", ) # UI IS_UI_ENABLED: bool = True UI_PATH: str = "/" # Rerank IS_RERANK_ENABLED: bool = True RERANK_TOP_N: int = 3 RERANK_MODEL_NAME: str = "cross-encoder/ms-marco-MiniLM-L-2-v2" class Config: case_sensitive = True env_file_encoding = "utf-8" environment = os.environ.get("ENVIRONMENT", "local") settings = Settings( ENVIRONMENT=environment, # ".env.{environment}" takes priority over ".env" _env_file=[".env", f".env.{environment}"], )