Imran1 commited on
Commit
2744dc1
1 Parent(s): eea1116

Update code/inference.py

Browse files
Files changed (1) hide show
  1. code/inference.py +1 -1
code/inference.py CHANGED
@@ -72,7 +72,7 @@ def model_fn(model_dir, context=None):
72
  model_dir,
73
  device_map="auto", # Automatically map layers across GPUs
74
  offload_folder=offload_dir, # Offload parts to disk if needed
75
- max_memory = {i: "15GiB" for i in range(torch.cuda.device_count())} # Example for reducing usage per GPU
76
  no_split_module_classes=["QwenForCausalLM"] # Ensure model is split across the GPUs
77
  )
78
 
 
72
  model_dir,
73
  device_map="auto", # Automatically map layers across GPUs
74
  offload_folder=offload_dir, # Offload parts to disk if needed
75
+ max_memory = {i: "15GiB" for i in range(torch.cuda.device_count())}, # Example for reducing usage per GPU
76
  no_split_module_classes=["QwenForCausalLM"] # Ensure model is split across the GPUs
77
  )
78