aws-neuron
/

optimum-neuron-cache

dacorvo HF staff commited on May 15

Commit

17e7257

•

1 Parent(s): 28f3bad

Remove llama2 7B config for 24 cores

Files changed (1) hide show

inference-cache-config/llama2.json CHANGED Viewed

@@ -6,18 +6,6 @@
       "num_cores": 2,
       "auto_cast_type": "fp16"
     },
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 8,
-      "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 1,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    },
     {
       "batch_size": 4,
       "sequence_length": 4096,
@@ -30,47 +18,23 @@
       "num_cores": 8,
       "auto_cast_type": "fp16"
     },
-    {
-      "batch_size": 4,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    },
     {
       "batch_size": 8,
       "sequence_length": 4096,
       "num_cores": 8,
       "auto_cast_type": "fp16"
     },
-    {
-      "batch_size": 8,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    },
     {
       "batch_size": 16,
       "sequence_length": 4096,
       "num_cores": 8,
       "auto_cast_type": "fp16"
     },
-    {
-      "batch_size": 16,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
-    },
     {
       "batch_size": 32,
       "sequence_length": 4096,
       "num_cores": 8,
       "auto_cast_type": "fp16"
-    },
-    {
-      "batch_size": 32,
-      "sequence_length": 4096,
-      "num_cores": 24,
-      "auto_cast_type": "fp16"
     }
   ],
   "meta-llama/Llama-2-13b-chat-hf": [

       "num_cores": 2,
       "auto_cast_type": "fp16"
     },
     {
       "batch_size": 4,
       "sequence_length": 4096,
       "num_cores": 8,
       "auto_cast_type": "fp16"
     },
     {
       "batch_size": 8,
       "sequence_length": 4096,
       "num_cores": 8,
       "auto_cast_type": "fp16"
     },
     {
       "batch_size": 16,
       "sequence_length": 4096,
       "num_cores": 8,
       "auto_cast_type": "fp16"
     },
     {
       "batch_size": 32,
       "sequence_length": 4096,
       "num_cores": 8,
       "auto_cast_type": "fp16"
     }
   ],
   "meta-llama/Llama-2-13b-chat-hf": [