{ "attention_layers": [ 12, 13 ], "auto_mapping": null, "base_model_name_or_path": "meta-llama/Llama-3.2-1B", "cache_dtype": "float16", "cache_size": 32768, "cache_type": "FIFO", "compression_factor": 4, "context_size": 1, "global_cache": false, "inference_mode": true, "neighborhood_size": 2, "neurocache_type": "ONDEVICE", "pooling_window": 8, "retrieval_map": { "12": 12 }, "retrieval_stride": 2, "similarity_fn": "l2", "task_type": "CAUSAL_LM", "topk": 8 }