File size: 1,512 Bytes
c79726f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# BEGIN GENERAL GGUF METADATA
id: Llama-3.2-3B-Instruct # Model ID unique between models (author / quantization)
model: Llama-3.2-3B-Instruct # Model ID which is used for request construct - should be unique between models (author / quantization)
name: Llama-3.2-3B-Instruct # metadata.general.name
version: 2 # metadata.version

# END GENERAL GGUF METADATA

# BEGIN INFERENCE PARAMETERS
# BEGIN REQUIRED
stop:                # tokenizer.ggml.eos_token_id
  - <|eot_id|>
# END REQUIRED

# BEGIN OPTIONAL
stream: true # Default true?
top_p: 0.9 # Ranges: 0 to 1
temperature: 0.7 # Ranges: 0 to 1
frequency_penalty: 0 # Ranges: 0 to 1
presence_penalty: 0 # Ranges: 0 to 1
max_tokens: 4096 # Should be default to context length
seed: -1
dynatemp_range: 0
dynatemp_exponent: 1
top_k: 40
min_p: 0.05
tfs_z: 1
typ_p: 1
repeat_last_n: 64
repeat_penalty: 1
mirostat: false
mirostat_tau: 5
mirostat_eta: 0.100000001
penalize_nl: false
ignore_eos: false
n_probs: 0
min_keep: 0
# END OPTIONAL
# END INFERENCE PARAMETERS

# BEGIN MODEL LOAD PARAMETERS
# BEGIN REQUIRED
engine: cortex.llamacpp # engine to run model
prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
# END REQUIRED

# BEGIN OPTIONAL
ctx_len: 4096 # llama.context_length | 0 or undefined = loaded from model
ngl: 29 # Undefined = loaded from model
# END OPTIONAL
# END MODEL LOAD PARAMETERS