llm

Sleeping

Chris4K commited on 19 days ago

Commit

d891875

verified ·

1 Parent(s): cd7a010

Update services/model_manager.py

Files changed (1) hide show

services/model_manager.py CHANGED Viewed

@@ -20,6 +20,7 @@ try:
 except Exception as e:
     print("Langfuse Offline")
 class ModelManager:
     def __init__(self, device: Optional[str] = None):
         self.logger = logging.getLogger(__name__)
@@ -38,6 +39,7 @@ class ModelManager:
         from transformers import AutoTokenizer
         return AutoTokenizer.from_pretrained(model_name)
     def load_model(self, model_id: str, model_path: str, model_type: str, config: ModelConfig) -> None:
         """Load a model with specified configuration."""
         try:
@@ -75,7 +77,7 @@ class ModelManager:
             self.logger.error(f"Failed to load model {model_id}: {str(e)}")
             raise
     def unload_model(self, model_id: str) -> None:
         """Unload a model and free resources."""
         if model_id in self.models:
@@ -83,7 +85,7 @@ class ModelManager:
             if model_id in self.tokenizers:
                 del self.tokenizers[model_id]
             torch.cuda.empty_cache()
     def _load_quantized_model(self, model_path: str, **kwargs) -> Llama:
         """Load a quantized GGUF model."""
         try:

 except Exception as e:
     print("Langfuse Offline")
+@observe()
 class ModelManager:
     def __init__(self, device: Optional[str] = None):
         self.logger = logging.getLogger(__name__)
         from transformers import AutoTokenizer
         return AutoTokenizer.from_pretrained(model_name)
+    @observe()
     def load_model(self, model_id: str, model_path: str, model_type: str, config: ModelConfig) -> None:
         """Load a model with specified configuration."""
         try:
             self.logger.error(f"Failed to load model {model_id}: {str(e)}")
             raise
+    @observe()
     def unload_model(self, model_id: str) -> None:
         """Unload a model and free resources."""
         if model_id in self.models:
             if model_id in self.tokenizers:
                 del self.tokenizers[model_id]
             torch.cuda.empty_cache()
+    @observe()
     def _load_quantized_model(self, model_path: str, **kwargs) -> Llama:
         """Load a quantized GGUF model."""
         try: