File size: 2,356 Bytes
11f324c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import os

import torch

# model path
MODEL_NAME = "saiga_mistral_7b.Q4_K_M.gguf"
MODEL_URL = f"https://huggingface.co./TheBloke/saiga_mistral_7b-GGUF/blob/main/{MODEL_NAME}"

# FOR PRODUCTION
CWD = os.path.dirname(os.path.realpath(__file__))
DATA_PATH = os.path.join(CWD, "data")
DOCS_PATH = os.path.join(DATA_PATH, "docs")
MODEL_PATH = os.path.join(CWD, "model")
MODEL_SAVE_PATH = os.path.join(MODEL_PATH, MODEL_NAME)

# RAG params
N_GPU_LAYERS = (
    -1 if torch.cuda.is_available() else 0
)  # The number of layers to put on the GPU. The rest will be on the CPU (0 means all layers on the CPU).
N_BATCH = 1024  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU

TEMPERATURE = 0.1  # The temperature of the sampling. 0.1 is a good value for most cases
MAX_TOKENS = 1024  # The maximum number of tokens to generate
TOP_P = 2
N_CTX = 2048  # context len, up to a maximum of 32k
CHUNK_SIZE = 750  # max number of letters for each chunk during splitting
CHUNK_OVERLAP = 200  # overlap between chunks
SEARCH_TYPE = "mmr"
LAST_MESSAGES = 3  # The number of last messages in conversation history to include in the context
REPEAT_PENALTY = 1.1  # The penalty for repeating tokens in the output
DEVICE = "cuda" if N_GPU_LAYERS > 0 else "cpu"

EMBED_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
VECTOR_STORE_PATH = os.path.join(DATA_PATH, "chroma_db")

# retriever config
SEARCH_KWARGS = {"k": 3, "score_threshold": 0.6}

DEFAULT_MESSAGE_TEMPLATE = "<s>{role}\n{content}</s>"
DEFAULT_RESPONSE_TEMPLATE = "<s>bot\n"
DEFAULT_SYSTEM_PROMPT = "Ты ассистент помощник, который отвечает на вопросы используя предоставленный контекст. \
                        В качестве контекста используются тексты из различных источников. \
                        Постарайся ответить на вопрос максимально точно. \
                        Для ответа используй только информацию из контекста и вопроса. Ничего не выдумывай. \
                        Если не можешь ответить на вопрос, напиши - 'Не хватает данных для ответа.' "