Update README.md
Browse files
README.md
CHANGED
@@ -11,28 +11,23 @@ This is a version of the LLama-2-70B-chat-hf model quantized to 2-bit via Half-Q
|
|
11 |
### Basic Usage
|
12 |
To run the model, install the HQQ library from https://github.com/mobiusml/hqq and use it as follows:
|
13 |
``` Python
|
14 |
-
from hqq.models.llama_hf import LlamaHQQ
|
15 |
-
import transformers
|
16 |
-
|
17 |
model_id = 'mobiuslabsgmbh/Llama-2-70b-chat-hf-2bit_g16_s128-HQQ'
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
model
|
22 |
```
|
23 |
|
24 |
### Basic Chat Example
|
25 |
``` Python
|
26 |
-
|
27 |
-
from hqq.models.llama_hf import LlamaHQQ
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
#Load the model
|
33 |
-
model = LlamaHQQ.from_quantized(model_id)
|
34 |
|
35 |
##########################################################################################################
|
|
|
36 |
from threading import Thread
|
37 |
|
38 |
from sys import stdout
|
|
|
11 |
### Basic Usage
|
12 |
To run the model, install the HQQ library from https://github.com/mobiusml/hqq and use it as follows:
|
13 |
``` Python
|
|
|
|
|
|
|
14 |
model_id = 'mobiuslabsgmbh/Llama-2-70b-chat-hf-2bit_g16_s128-HQQ'
|
15 |
+
|
16 |
+
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
|
17 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
18 |
+
model = HQQModelForCausalLM.from_quantized(model_id)
|
19 |
```
|
20 |
|
21 |
### Basic Chat Example
|
22 |
``` Python
|
23 |
+
model_id = 'mobiuslabsgmbh/Llama-2-70b-chat-hf-2bit_g16_s128-HQQ'
|
|
|
24 |
|
25 |
+
from hqq.engine.hf import HQQModelForCausalLM, AutoTokenizer
|
26 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
27 |
+
model = HQQModelForCausalLM.from_quantized(model_id)
|
|
|
|
|
28 |
|
29 |
##########################################################################################################
|
30 |
+
import transformers
|
31 |
from threading import Thread
|
32 |
|
33 |
from sys import stdout
|