talk2docs / build_model.py
olegperegudov's picture
wip
11f324c
raw
history blame
841 Bytes
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp
import constants
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
def load_model():
return LlamaCpp(
model_path=constants.MODEL_SAVE_PATH,
temperature=constants.TEMPERATURE,
max_tokens=constants.MAX_TOKENS,
top_p=constants.TOP_P,
# callback_manager=callback_manager, # will stream to stdout, but wont attach to variable
verbose=False, # Verbose is required to pass to the callback manager
n_gpu_layers=constants.N_GPU_LAYERS,
n_batch=constants.N_BATCH,
n_ctx=constants.N_CTX,
repeat_penalty=constants.REPEAT_PENALTY,
streaming=False,
)