fireballoon
/

baichuan-vicuna-chinese-7b-gptq

Text Generation

text-generation-inference

Model card Files Files and versions Community

fireballoon commited on Jun 22, 2023

Commit

4a1a0d4

•

1 Parent(s): dbd619f

Update README.md

Files changed (1) hide show

README.md +1 -1

README.md CHANGED Viewed

@@ -37,7 +37,7 @@ datasets:
 >>> from transformers import AutoTokenizer, TextStreamer
 >>> from auto_gptq import AutoGPTQForCausalLM
 >>> tokenizer = AutoTokenizer.from_pretrained("fireballoon/baichuan-vicuna-chinese-7b-gptq", use_fast=False)
->>> model = AutoGPTQForCausalLM.from_pretrained("fireballoon/baichuan-vicuna-chinese-7b-gptq").cuda()
 >>> streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 >>> instruction = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {} ASSISTANT:"
 >>> prompt = instruction.format("How can I improve my time management skills?")  # user message

 >>> from transformers import AutoTokenizer, TextStreamer
 >>> from auto_gptq import AutoGPTQForCausalLM
 >>> tokenizer = AutoTokenizer.from_pretrained("fireballoon/baichuan-vicuna-chinese-7b-gptq", use_fast=False)
+>>> model = AutoGPTQForCausalLM.from_quantized("fireballoon/baichuan-vicuna-chinese-7b-gptq", device="cuda:0")
 >>> streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
 >>> instruction = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {} ASSISTANT:"
 >>> prompt = instruction.format("How can I improve my time management skills?")  # user message