Update README.md
Browse files
README.md
CHANGED
@@ -797,9 +797,9 @@ tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
|
|
797 |
|
798 |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
799 |
model_basename=model_basename,
|
|
|
800 |
use_safetensors=True,
|
801 |
trust_remote_code=False,
|
802 |
-
device="cuda:0",
|
803 |
use_triton=use_triton,
|
804 |
quantize_config=None)
|
805 |
|
|
|
797 |
|
798 |
model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
|
799 |
model_basename=model_basename,
|
800 |
+
max_memory={0: '60GiB', 1: '79GiB'} # max_memory is for 2 x 80GB GPUs; adjust if your config is different!
|
801 |
use_safetensors=True,
|
802 |
trust_remote_code=False,
|
|
|
803 |
use_triton=use_triton,
|
804 |
quantize_config=None)
|
805 |
|