WinstonShum
/

merged_llama_3.1_8b_instruct_guardrails

Text Classification

text-generation-inference

Inference Endpoints

4-bit precision

Model card Files Files and versions Community

WinstonShum commited on Aug 14

Commit

3361970

•

1 Parent(s): b1145b7

Update handler.py

Files changed (1) hide show

handler.py +4 -0

handler.py CHANGED Viewed

@@ -22,6 +22,10 @@ class EndpointHandler:
         )
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         logger.info("Model and tokenizer loaded sucessfully")
         # Define the prompt template
         self.prompt_template = """You are an assistant designed to identify whether a user query is malicious.

         )
         self.tokenizer = AutoTokenizer.from_pretrained(path)
         logger.info("Model and tokenizer loaded sucessfully")
+        gpu_memory_allocated = torch.cuda.memory_allocated() / 1024**3  # Convert to GB
+        gpu_memory_reserved = torch.cuda.memory_reserved() / 1024**3  # Convert to GB
+        logger.info(f"GPU memory allocated after model loading: {gpu_memory_allocated:.2f} GB")
+        logger.info(f"GPU memory reserved after model loading: {gpu_memory_reserved:.2f} GB")
         # Define the prompt template
         self.prompt_template = """You are an assistant designed to identify whether a user query is malicious.