Update README.md
Browse files
README.md
CHANGED
@@ -7,13 +7,14 @@ This is a quantized version of h2oai/h2ogpt-4096-llama2-13b-chat, formatted in G
|
|
7 |
## Available Formats
|
8 |
|
9 |
### GGUF
|
10 |
-
|
11 |
-
|
|
12 |
-
|
|
|
|
13 |
|
14 |
### Currently in conversion
|
15 |
|
16 |
-
| Bits | Use case |
|
17 |
| ---- | ---- | ----- |
|
18 |
| q3_K_L | 3 | New k-quant method. Uses GGML_TYPE_Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else GGML_TYPE_Q3_K |
|
19 |
| q3_K_M | 3 | New k-quant method. Uses GGML_TYPE_Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else GGML_TYPE_Q3_K |
|
|
|
7 |
## Available Formats
|
8 |
|
9 |
### GGUF
|
10 |
+
|
11 |
+
| Format | Bits | Use case |
|
12 |
+
| ---- | ---- | ----- |
|
13 |
+
| q8_0 | 8 | Original quant method, 8-bit. |
|
14 |
|
15 |
### Currently in conversion
|
16 |
|
17 |
+
| Format | Bits | Use case |
|
18 |
| ---- | ---- | ----- |
|
19 |
| q3_K_L | 3 | New k-quant method. Uses GGML_TYPE_Q5_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else GGML_TYPE_Q3_K |
|
20 |
| q3_K_M | 3 | New k-quant method. Uses GGML_TYPE_Q4_K for the attention.wv, attention.wo, and feed_forward.w2 tensors, else GGML_TYPE_Q3_K |
|