Update README.md
Browse files
README.md
CHANGED
@@ -1,9 +1,10 @@
|
|
|
|
1 |
# how to use
|
2 |
need install "pip install git+https://github.com/cczhong11/AutoGPTQ" before https://github.com/PanQiWei/AutoGPTQ/pull/189 got merged
|
3 |
```
|
4 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
5 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
6 |
-
quantized_model_dir = "cczhong/internlm-chat-7b-4bit-gptq"
|
7 |
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
|
8 |
model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir, device="cuda:0",trust_remote_code=True)
|
9 |
response, history = model.chat(tokenizer, "你好", history=[])
|
|
|
1 |
+
please use cczhong/internlm-chat-7b-4bit-gptq before I figure out why it did not work
|
2 |
# how to use
|
3 |
need install "pip install git+https://github.com/cczhong11/AutoGPTQ" before https://github.com/PanQiWei/AutoGPTQ/pull/189 got merged
|
4 |
```
|
5 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
6 |
from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
|
7 |
+
quantized_model_dir = "cczhong/internlm-chat-7b-4bit-gptq-safetensor"
|
8 |
tokenizer = AutoTokenizer.from_pretrained(quantized_model_dir, trust_remote_code=True)
|
9 |
model = AutoGPTQForCausalLM.from_quantized(quantized_model_dir, device="cuda:0",trust_remote_code=True)
|
10 |
response, history = model.chat(tokenizer, "你好", history=[])
|