# BEGIN GENERAL GGUF METADATA | |
id: Mistral-Nemo-Instruct-2407 # Model ID unique between models (author / quantization) | |
model: mistral-nemo # Model ID which is used for request construct - should be unique between models (author / quantization) | |
name: Mistral-Nemo-Instruct-2407 # metadata.general.name | |
version: 2 # metadata.version | |
# END GENERAL GGUF METADATA | |
# BEGIN INFERENCE PARAMETERS | |
# BEGIN REQUIRED | |
stop: # tokenizer.ggml.eos_token_id | |
- </s> | |
# END REQUIRED | |
# BEGIN OPTIONAL | |
stream: true # Default true? | |
top_p: 0.949999988 # Ranges: 0 to 1 | |
temperature: 0.699999988 # Ranges: 0 to 1 | |
frequency_penalty: 0 # Ranges: 0 to 1 | |
presence_penalty: 0 # Ranges: 0 to 1 | |
max_tokens: 4096 # Should be default to context length | |
seed: -1 | |
dynatemp_range: 0 | |
dynatemp_exponent: 1 | |
top_k: 40 | |
min_p: 0.0500000007 | |
tfs_z: 1 | |
typ_p: 1 | |
repeat_last_n: 64 | |
repeat_penalty: 1 | |
mirostat: false | |
mirostat_tau: 5 | |
mirostat_eta: 0.100000001 | |
penalize_nl: false | |
ignore_eos: false | |
n_probs: 0 | |
min_keep: 0 | |
# END OPTIONAL | |
# END INFERENCE PARAMETERS | |
# BEGIN MODEL LOAD PARAMETERS | |
# BEGIN REQUIRED | |
engine: llama-cpp # engine to run model | |
prompt_template: "[INST] {system_message}\n[INST] {prompt} [/INST]" | |
# END REQUIRED | |
# BEGIN OPTIONAL | |
ctx_len: 4096 # llama.context_length | 0 or undefined = loaded from model | |
ngl: 41 # Undefined = loaded from model | |
# END OPTIONAL | |
# END MODEL LOAD PARAMETERS | |