CamiloVega commited on
Commit
05204e6
·
verified ·
1 Parent(s): bb45a47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -70,13 +70,13 @@ class ModelManager:
70
  self.model = AutoModelForCausalLM.from_pretrained(
71
  model_name,
72
  token=HUGGINGFACE_TOKEN,
73
- device_map="auto", # Automatically handle device placement
74
- torch_dtype=torch.float16, # Use float16 to reduce memory usage
75
- low_cpu_mem_usage=True, # Optimize CPU memory usage
76
- use_safetensors=True, # Use safetensors for better memory management
77
- max_memory={0: "6GB"}, # Limit GPU memory usage
78
- offload_folder="offload", # Folder for offloading to CPU
79
- offload_state_dict=True # Offload state dict to CPU
80
  )
81
 
82
  # Create pipeline with minimal settings
@@ -86,8 +86,8 @@ class ModelManager:
86
  "text-generation",
87
  model=self.model,
88
  tokenizer=self.tokenizer,
89
- device_map="auto", # Automatically handle device placement
90
- torch_dtype=torch.float16, # Use float16 for memory efficiency
91
  max_new_tokens=512,
92
  do_sample=True,
93
  temperature=0.7,
 
70
  self.model = AutoModelForCausalLM.from_pretrained(
71
  model_name,
72
  token=HUGGINGFACE_TOKEN,
73
+ device_map="auto",
74
+ torch_dtype=torch.float16,
75
+ low_cpu_mem_usage=True,
76
+ use_safetensors=True,
77
+ max_memory={0: "6GB"},
78
+ offload_folder="offload",
79
+ offload_state_dict=True
80
  )
81
 
82
  # Create pipeline with minimal settings
 
86
  "text-generation",
87
  model=self.model,
88
  tokenizer=self.tokenizer,
89
+ device_map="auto",
90
+ torch_dtype=torch.float16,
91
  max_new_tokens=512,
92
  do_sample=True,
93
  temperature=0.7,