Chris4K commited on
Commit
d891875
·
verified ·
1 Parent(s): cd7a010

Update services/model_manager.py

Browse files
Files changed (1) hide show
  1. services/model_manager.py +4 -2
services/model_manager.py CHANGED
@@ -20,6 +20,7 @@ try:
20
  except Exception as e:
21
  print("Langfuse Offline")
22
 
 
23
  class ModelManager:
24
  def __init__(self, device: Optional[str] = None):
25
  self.logger = logging.getLogger(__name__)
@@ -38,6 +39,7 @@ class ModelManager:
38
  from transformers import AutoTokenizer
39
  return AutoTokenizer.from_pretrained(model_name)
40
 
 
41
  def load_model(self, model_id: str, model_path: str, model_type: str, config: ModelConfig) -> None:
42
  """Load a model with specified configuration."""
43
  try:
@@ -75,7 +77,7 @@ class ModelManager:
75
  self.logger.error(f"Failed to load model {model_id}: {str(e)}")
76
  raise
77
 
78
-
79
  def unload_model(self, model_id: str) -> None:
80
  """Unload a model and free resources."""
81
  if model_id in self.models:
@@ -83,7 +85,7 @@ class ModelManager:
83
  if model_id in self.tokenizers:
84
  del self.tokenizers[model_id]
85
  torch.cuda.empty_cache()
86
-
87
  def _load_quantized_model(self, model_path: str, **kwargs) -> Llama:
88
  """Load a quantized GGUF model."""
89
  try:
 
20
  except Exception as e:
21
  print("Langfuse Offline")
22
 
23
+ @observe()
24
  class ModelManager:
25
  def __init__(self, device: Optional[str] = None):
26
  self.logger = logging.getLogger(__name__)
 
39
  from transformers import AutoTokenizer
40
  return AutoTokenizer.from_pretrained(model_name)
41
 
42
+ @observe()
43
  def load_model(self, model_id: str, model_path: str, model_type: str, config: ModelConfig) -> None:
44
  """Load a model with specified configuration."""
45
  try:
 
77
  self.logger.error(f"Failed to load model {model_id}: {str(e)}")
78
  raise
79
 
80
+ @observe()
81
  def unload_model(self, model_id: str) -> None:
82
  """Unload a model and free resources."""
83
  if model_id in self.models:
 
85
  if model_id in self.tokenizers:
86
  del self.tokenizers[model_id]
87
  torch.cuda.empty_cache()
88
+ @observe()
89
  def _load_quantized_model(self, model_path: str, **kwargs) -> Llama:
90
  """Load a quantized GGUF model."""
91
  try: