Update README.md
Browse files
README.md
CHANGED
@@ -238,44 +238,4 @@ You can either specify a new local-dir (Meta-Llama-3.1-8B-Q8_0) or download them
|
|
238 |
|
239 |
## Reproducibility
|
240 |
|
241 |
-
|
242 |
-
|
243 |
-
```
|
244 |
-
git lfs install
|
245 |
-
git clone https://huggingface.co/meta-llama/Meta-Llama-3.1-8B
|
246 |
-
```
|
247 |
-
|
248 |
-
### Quantization
|
249 |
-
|
250 |
-
```
|
251 |
-
cd llama.cpp
|
252 |
-
python ./convert_hf_to_gguf.py ../Meta-Llama-3.1-8B --outtype bf16 --outfile ../Meta-Llama-3.1-8B.BF16.gguf
|
253 |
-
python ./convert_hf_to_gguf.py ../Meta-Llama-3.1-8B --outtype f16 --outfile ../Meta-Llama-3.1-8B-FP16.gguf
|
254 |
-
python ./convert_hf_to_gguf.py ../Meta-Llama-3.1-8B --outtype q8_0 --outfile ../Meta-Llama-3.1-8B-Q8_0.gguf
|
255 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q6_K.gguf Q6_K
|
256 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q5_K_S.gguf Q5_K_S
|
257 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q5_K_M.gguf Q5_K_M
|
258 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q4_K_M.gguf Q4_K_M
|
259 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q4_K_S.gguf Q4_K_S
|
260 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q3_K_L.gguf Q3_K_L
|
261 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q3_K_M.gguf Q3_K_M
|
262 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q3_K_S.gguf Q3_K_S
|
263 |
-
./llama-quantize ../Meta-Llama-3.1-8B.BF16.gguf ../Meta-Llama-3.1-8B-Q2_K.gguf Q2_K
|
264 |
-
```
|
265 |
-
|
266 |
-
### Perplexity
|
267 |
-
|
268 |
-
```
|
269 |
-
cd llama.cpp
|
270 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B.BF16.gguf -f ../wikitext-2-raw/wiki.test.raw
|
271 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q8_0.gguf -f ../wikitext-2-raw/wiki.test.raw
|
272 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q6_K.gguf -f ../wikitext-2-raw/wiki.test.raw
|
273 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q5_K_M.gguf -f ../wikitext-2-raw/wiki.test.raw
|
274 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q5_K_S.gguf -f ../wikitext-2-raw/wiki.test.raw
|
275 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q4_K_M.gguf -f ../wikitext-2-raw/wiki.test.raw
|
276 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q4_K_S.gguf -f ../wikitext-2-raw/wiki.test.raw
|
277 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q3_K_L.gguf -f ../wikitext-2-raw/wiki.test.raw
|
278 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q3_K_M.gguf -f ../wikitext-2-raw/wiki.test.raw
|
279 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q3_K_S.gguf -f ../wikitext-2-raw/wiki.test.raw
|
280 |
-
./llama-perplexity -m ../Meta-Llama-3.1-8B-Q2_K.gguf -f ../wikitext-2-raw/wiki.test.raw
|
281 |
-
```
|
|
|
238 |
|
239 |
## Reproducibility
|
240 |
|
241 |
+
https://github.com/ggerganov/llama.cpp/issues/8650#issuecomment-2261497976
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|