Spaces:

rodrigomasini
/

rephrase

Paused

rodrigomasini commited on Nov 8, 2023

Commit

f093c76

•

1 Parent(s): ce75bc0

Update app_v3.py

Files changed (1) hide show

app_v3.py CHANGED Viewed

@@ -6,9 +6,9 @@ import os
 import torch
 import subprocess
-os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
-os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
-os.environ["CUDA_VISIBLE_DEVICES"]="1"
 # Define pretrained and quantized model directories
@@ -27,8 +27,6 @@ pretrained_model_dir = "FPHam/Jackson_The_Formalizer_V2_13b_GPTQ"
 #model_name_or_path = quantized_model_dir
 model_basename = "Jackson2-4bit-128g-GPTQ"
-#os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 # Before allocating or loading the model, clear up memory
 #gc.collect()
 #torch.cuda.empty_cache()
@@ -38,15 +36,15 @@ use_triton = False
 if torch.cuda.is_available():
     torch.cuda.empty_cache()
 #tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
 tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
-device = "cuda:0" if torch.cuda.is_available() else "cpu"
 model = AutoGPTQForCausalLM.from_quantized(
     pretrained_model_dir,
     model_basename=model_basename,
     use_safetensors=True,
     device=device,
 )

 import torch
 import subprocess
+# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128' # => just makes sense with more than one GPU, since is trying to split
+#os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # => just makes sense when more GPUs
+#os.environ["CUDA_VISIBLE_DEVICES"]="0,1" # => this is an example of numbers of devices
 # Define pretrained and quantized model directories
 #model_name_or_path = quantized_model_dir
 model_basename = "Jackson2-4bit-128g-GPTQ"
 # Before allocating or loading the model, clear up memory
 #gc.collect()
 #torch.cuda.empty_cache()
 if torch.cuda.is_available():
     torch.cuda.empty_cache()
 #tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
 tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
+device = "cuda:0" if torch.cuda.is_available() else "cpu" # best configuration besides the auto option
 model = AutoGPTQForCausalLM.from_quantized(
     pretrained_model_dir,
     model_basename=model_basename,
     use_safetensors=True,
     device=device,
+    max_memory={0: "10GIB"}
 )