""" Returns a random deployment from the list of healthy deployments. If weights are provided, it will return a deployment based on the weights. """ import random from typing import TYPE_CHECKING, Any, Dict, List, Union from litellm._logging import verbose_router_logger if TYPE_CHECKING: from litellm.router import Router as _Router LitellmRouter = _Router else: LitellmRouter = Any def simple_shuffle( llm_router_instance: LitellmRouter, healthy_deployments: Union[List[Any], Dict[Any, Any]], model: str, ) -> Dict: """ Returns a random deployment from the list of healthy deployments. If weights are provided, it will return a deployment based on the weights. If users pass `rpm` or `tpm`, we do a random weighted pick - based on `rpm`/`tpm`. Args: llm_router_instance: LitellmRouter instance healthy_deployments: List of healthy deployments model: Model name Returns: Dict: A single healthy deployment """ ############## Check if 'weight' param set for a weighted pick ################# weight = healthy_deployments[0].get("litellm_params").get("weight", None) if weight is not None: # use weight-random pick if rpms provided weights = [m["litellm_params"].get("weight", 0) for m in healthy_deployments] verbose_router_logger.debug(f"\nweight {weights}") total_weight = sum(weights) weights = [weight / total_weight for weight in weights] verbose_router_logger.debug(f"\n weights {weights}") # Perform weighted random pick selected_index = random.choices(range(len(weights)), weights=weights)[0] verbose_router_logger.debug(f"\n selected index, {selected_index}") deployment = healthy_deployments[selected_index] verbose_router_logger.info( f"get_available_deployment for model: {model}, Selected deployment: {llm_router_instance.print_deployment(deployment) or deployment[0]} for model: {model}" ) return deployment or deployment[0] ############## Check if we can do a RPM/TPM based weighted pick ################# rpm = healthy_deployments[0].get("litellm_params").get("rpm", None) if rpm is not None: # use weight-random pick if rpms provided rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments] verbose_router_logger.debug(f"\nrpms {rpms}") total_rpm = sum(rpms) weights = [rpm / total_rpm for rpm in rpms] verbose_router_logger.debug(f"\n weights {weights}") # Perform weighted random pick selected_index = random.choices(range(len(rpms)), weights=weights)[0] verbose_router_logger.debug(f"\n selected index, {selected_index}") deployment = healthy_deployments[selected_index] verbose_router_logger.info( f"get_available_deployment for model: {model}, Selected deployment: {llm_router_instance.print_deployment(deployment) or deployment[0]} for model: {model}" ) return deployment or deployment[0] ############## Check if we can do a RPM/TPM based weighted pick ################# tpm = healthy_deployments[0].get("litellm_params").get("tpm", None) if tpm is not None: # use weight-random pick if rpms provided tpms = [m["litellm_params"].get("tpm", 0) for m in healthy_deployments] verbose_router_logger.debug(f"\ntpms {tpms}") total_tpm = sum(tpms) weights = [tpm / total_tpm for tpm in tpms] verbose_router_logger.debug(f"\n weights {weights}") # Perform weighted random pick selected_index = random.choices(range(len(tpms)), weights=weights)[0] verbose_router_logger.debug(f"\n selected index, {selected_index}") deployment = healthy_deployments[selected_index] verbose_router_logger.info( f"get_available_deployment for model: {model}, Selected deployment: {llm_router_instance.print_deployment(deployment) or deployment[0]} for model: {model}" ) return deployment or deployment[0] ############## No RPM/TPM passed, we do a random pick ################# item = random.choice(healthy_deployments) return item or item[0]