--- model_url: https://huggingface.co./TheBloke/llama2_7b_chat_uncensored-GGML typer: delay: 0.1 runpod: endpoint_id: vxi8yc4mxz187n prefer_async: true llm: max_tokens: 600 top_k: top_p: temperature: repetition_penalty: last_n_tokens: seed: -1 batch_size: 8 threads: -1 stop: - "" queue: max_size: 16 concurrency_count: 1 # recommend setting this no larger than your current