Rename inference-cache-config/llama2.json to inference-cache-config/llama2-7b-13b.json
Browse files
inference-cache-config/{llama2.json → llama2-7b-13b.json}
RENAMED
@@ -98,19 +98,5 @@
|
|
98 |
"num_cores": 24,
|
99 |
"auto_cast_type": "fp16"
|
100 |
}
|
101 |
-
],
|
102 |
-
"meta-llama/Llama-2-70b-chat-hf": [
|
103 |
-
{
|
104 |
-
"batch_size": 1,
|
105 |
-
"sequence_length": 4096,
|
106 |
-
"num_cores": 24,
|
107 |
-
"auto_cast_type": "fp16"
|
108 |
-
},
|
109 |
-
{
|
110 |
-
"batch_size": 4,
|
111 |
-
"sequence_length": 4096,
|
112 |
-
"num_cores": 24,
|
113 |
-
"auto_cast_type": "fp16"
|
114 |
-
}
|
115 |
]
|
116 |
}
|
|
|
98 |
"num_cores": 24,
|
99 |
"auto_cast_type": "fp16"
|
100 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
]
|
102 |
}
|