cortexso
/

mistral-nemo

Text Generation

Inference Endpoints

Model card Files Files and versions Community

mistral-nemo / model.yml

thuannh's picture

Update model.yml

1f2d658 verified 5 months ago

1.37 kB

	# BEGIN GENERAL GGUF METADATA
	id: Mistral-Nemo-Instruct-2407 # Model ID unique between models (author / quantization)
	model: mistral-nemo # Model ID which is used for request construct - should be unique between models (author / quantization)
	name: Mistral-Nemo-Instruct-2407 # metadata.general.name

	version: 2 # metadata.version
	# END GENERAL GGUF METADATA

	# BEGIN INFERENCE PARAMETERS
	# BEGIN REQUIRED
	stop: # tokenizer.ggml.eos_token_id
	- </s>
	# END REQUIRED

	# BEGIN OPTIONAL
	stream: true # Default true?
	top_p: 0.949999988 # Ranges: 0 to 1
	temperature: 0.699999988 # Ranges: 0 to 1
	frequency_penalty: 0 # Ranges: 0 to 1
	presence_penalty: 0 # Ranges: 0 to 1
	max_tokens: 4096 # Should be default to context length
	seed: -1
	dynatemp_range: 0
	dynatemp_exponent: 1
	top_k: 40
	min_p: 0.0500000007
	tfs_z: 1
	typ_p: 1
	repeat_last_n: 64
	repeat_penalty: 1
	mirostat: false
	mirostat_tau: 5
	mirostat_eta: 0.100000001
	penalize_nl: false
	ignore_eos: false
	n_probs: 0
	min_keep: 0
	# END OPTIONAL
	# END INFERENCE PARAMETERS

	# BEGIN MODEL LOAD PARAMETERS
	# BEGIN REQUIRED
	engine: llama-cpp # engine to run model
	prompt_template: "[INST] {system_message}\n[INST] {prompt} [/INST]"
	# END REQUIRED

	# BEGIN OPTIONAL
	ctx_len: 4096 # llama.context_length \| 0 or undefined = loaded from model
	ngl: 41 # Undefined = loaded from model
	# END OPTIONAL
	# END MODEL LOAD PARAMETERS