# health_check.py import psutil from dataclasses import dataclass from typing import Dict, Any @dataclass class HealthStatus: status: str gpu_memory: Dict[str, float] cpu_usage: float ram_usage: float model_status: Dict[str, str] class HealthCheck: @staticmethod def check_gpu_memory() -> Dict[str, float]: if torch.cuda.is_available(): return { f"gpu_{i}": torch.cuda.memory_allocated(i) / 1024**3 for i in range(torch.cuda.device_count()) } return {} @staticmethod def check_system_resources() -> HealthStatus: return HealthStatus( status="healthy", gpu_memory=HealthCheck.check_gpu_memory(), cpu_usage=psutil.cpu_percent(), ram_usage=psutil.virtual_memory().percent, #TODO add more system resources like disk, network, etc. model_status={} # To be filled by the model manager )