Imran1
/

Qwen2.5-72B-Instruct-FP8

Model card Files Files and versions Community

Imran1 commited on 14 days ago

Commit

edaf8b6

•

1 Parent(s): 40362ea

Update inference.py

Files changed (1) hide show

inference.py +1 -1

inference.py CHANGED Viewed

@@ -17,7 +17,7 @@ def format_chat(messages: List[Dict[str, str]], tokenizer) -> str:
     return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 # Model loading function for SageMaker with tensor parallelism
-def model_fn(model_dir):
     """
     Load the model and tokenizer from the model directory for inference.
     This version supports tensor parallelism across 4 GPUs.

     return tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 # Model loading function for SageMaker with tensor parallelism
+def model_fn(model_dir,context=None):
     """
     Load the model and tokenizer from the model directory for inference.
     This version supports tensor parallelism across 4 GPUs.