TheBloke commited on
Commit
209e719
1 Parent(s): ab835a6

Upload README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -3
README.md CHANGED
@@ -168,8 +168,8 @@ model_name_or_path = "TheBloke/LlongOrca-7B-16K-GPTQ"
168
  # To use a different branch, change revision
169
  # For example: revision="gptq-4bit-32g-actorder_True"
170
  model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
171
- torch_dtype=torch.bfloat16,
172
  device_map="auto",
 
173
  revision="main")
174
 
175
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
@@ -186,7 +186,7 @@ prompt_template=f'''<|im_start|>system
186
  print("\n\n*** Generate:")
187
 
188
  input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
189
- output = model.generate(inputs=input_ids, temperature=0.7, max_new_tokens=512)
190
  print(tokenizer.decode(output[0]))
191
 
192
  # Inference can also be done using transformers' pipeline
@@ -197,9 +197,11 @@ pipe = pipeline(
197
  model=model,
198
  tokenizer=tokenizer,
199
  max_new_tokens=512,
 
200
  temperature=0.7,
201
  top_p=0.95,
202
- repetition_penalty=1.15
 
203
  )
204
 
205
  print(pipe(prompt_template)[0]['generated_text'])
 
168
  # To use a different branch, change revision
169
  # For example: revision="gptq-4bit-32g-actorder_True"
170
  model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
 
171
  device_map="auto",
172
+ trust_remote_code=False,
173
  revision="main")
174
 
175
  tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
 
186
  print("\n\n*** Generate:")
187
 
188
  input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
189
+ output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
190
  print(tokenizer.decode(output[0]))
191
 
192
  # Inference can also be done using transformers' pipeline
 
197
  model=model,
198
  tokenizer=tokenizer,
199
  max_new_tokens=512,
200
+ do_sample=True,
201
  temperature=0.7,
202
  top_p=0.95,
203
+ top_k=40,
204
+ repetition_penalty=1.1
205
  )
206
 
207
  print(pipe(prompt_template)[0]['generated_text'])