Update code/inference.py
Browse files- code/inference.py +1 -1
code/inference.py
CHANGED
@@ -72,7 +72,7 @@ def model_fn(model_dir, context=None):
|
|
72 |
model_dir,
|
73 |
device_map="auto", # Automatically map layers across GPUs
|
74 |
offload_folder=offload_dir, # Offload parts to disk if needed
|
75 |
-
max_memory = {i: "15GiB" for i in range(torch.cuda.device_count())} # Example for reducing usage per GPU
|
76 |
no_split_module_classes=["QwenForCausalLM"] # Ensure model is split across the GPUs
|
77 |
)
|
78 |
|
|
|
72 |
model_dir,
|
73 |
device_map="auto", # Automatically map layers across GPUs
|
74 |
offload_folder=offload_dir, # Offload parts to disk if needed
|
75 |
+
max_memory = {i: "15GiB" for i in range(torch.cuda.device_count())}, # Example for reducing usage per GPU
|
76 |
no_split_module_classes=["QwenForCausalLM"] # Ensure model is split across the GPUs
|
77 |
)
|
78 |
|