ggml-runpod-ui / config.yml
Noname010101's picture
Update config.yml
b2c72d0
raw
history blame
413 Bytes
---
model_url: https://huggingface.co./TheBloke/llama2_7b_chat_uncensored-GGML
typer:
delay: 0.1
runpod:
endpoint_id: vxi8yc4mxz187n
prefer_async: true
llm:
max_tokens: 600
top_k:
top_p:
temperature:
repetition_penalty:
last_n_tokens:
seed: -1
batch_size: 8
threads: -1
stop:
- "</s>"
queue:
max_size: 16
concurrency_count: 1 # recommend setting this no larger than your current