{ | |
"attention_layers": [ | |
12, | |
13 | |
], | |
"auto_mapping": null, | |
"base_model_name_or_path": "meta-llama/Llama-3.2-1B", | |
"cache_dtype": "float16", | |
"cache_size": 32768, | |
"cache_type": "FIFO", | |
"compression_factor": 4, | |
"context_size": 1, | |
"global_cache": false, | |
"inference_mode": true, | |
"neighborhood_size": 2, | |
"neurocache_type": "ONDEVICE", | |
"pooling_window": 8, | |
"retrieval_map": { | |
"12": 12 | |
}, | |
"retrieval_stride": 2, | |
"similarity_fn": "l2", | |
"task_type": "CAUSAL_LM", | |
"topk": 8 | |
} | |