import json import time import uuid from enum import Enum from typing import Any, Dict, List, Literal, Optional, Tuple, Union from aiohttp import FormData from openai._models import BaseModel as OpenAIObject from openai.types.audio.transcription_create_params import FileTypes # type: ignore from openai.types.completion_usage import ( CompletionTokensDetails, CompletionUsage, PromptTokensDetails, ) from openai.types.moderation import ( Categories, CategoryAppliedInputTypes, CategoryScores, ) from openai.types.moderation_create_response import Moderation, ModerationCreateResponse from pydantic import BaseModel, ConfigDict, Field, PrivateAttr from typing_extensions import Callable, Dict, Required, TypedDict, override from ..litellm_core_utils.core_helpers import map_finish_reason from .guardrails import GuardrailEventHooks from .llms.openai import ( ChatCompletionToolCallChunk, ChatCompletionUsageBlock, OpenAIChatCompletionChunk, ) from .rerank import RerankResponse def _generate_id(): # private helper function return "chatcmpl-" + str(uuid.uuid4()) class LiteLLMPydanticObjectBase(BaseModel): """ Implements default functions, all pydantic objects should have. """ def json(self, **kwargs): # type: ignore try: return self.model_dump(**kwargs) # noqa except Exception: # if using pydantic v1 return self.dict(**kwargs) def fields_set(self): try: return self.model_fields_set # noqa except Exception: # if using pydantic v1 return self.__fields_set__ model_config = ConfigDict(protected_namespaces=()) class LiteLLMCommonStrings(Enum): redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'" SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"] class CostPerToken(TypedDict): input_cost_per_token: float output_cost_per_token: float class ProviderField(TypedDict): field_name: str field_type: Literal["string"] field_description: str field_value: str class ProviderSpecificModelInfo(TypedDict, total=False): supports_system_messages: Optional[bool] supports_response_schema: Optional[bool] supports_vision: Optional[bool] supports_function_calling: Optional[bool] supports_tool_choice: Optional[bool] supports_assistant_prefill: Optional[bool] supports_prompt_caching: Optional[bool] supports_audio_input: Optional[bool] supports_embedding_image_input: Optional[bool] supports_audio_output: Optional[bool] supports_pdf_input: Optional[bool] supports_native_streaming: Optional[bool] supports_parallel_function_calling: Optional[bool] class ModelInfoBase(ProviderSpecificModelInfo, total=False): key: Required[str] # the key in litellm.model_cost which is returned max_tokens: Required[Optional[int]] max_input_tokens: Required[Optional[int]] max_output_tokens: Required[Optional[int]] input_cost_per_token: Required[float] cache_creation_input_token_cost: Optional[float] cache_read_input_token_cost: Optional[float] input_cost_per_character: Optional[float] # only for vertex ai models input_cost_per_audio_token: Optional[float] input_cost_per_token_above_128k_tokens: Optional[float] # only for vertex ai models input_cost_per_character_above_128k_tokens: Optional[ float ] # only for vertex ai models input_cost_per_query: Optional[float] # only for rerank models input_cost_per_image: Optional[float] # only for vertex ai models input_cost_per_audio_per_second: Optional[float] # only for vertex ai models input_cost_per_video_per_second: Optional[float] # only for vertex ai models input_cost_per_second: Optional[float] # for OpenAI Speech models output_cost_per_token: Required[float] output_cost_per_character: Optional[float] # only for vertex ai models output_cost_per_audio_token: Optional[float] output_cost_per_token_above_128k_tokens: Optional[ float ] # only for vertex ai models output_cost_per_character_above_128k_tokens: Optional[ float ] # only for vertex ai models output_cost_per_image: Optional[float] output_vector_size: Optional[int] output_cost_per_video_per_second: Optional[float] # only for vertex ai models output_cost_per_audio_per_second: Optional[float] # only for vertex ai models output_cost_per_second: Optional[float] # for OpenAI Speech models litellm_provider: Required[str] mode: Required[ Literal[ "completion", "embedding", "image_generation", "chat", "audio_transcription" ] ] tpm: Optional[int] rpm: Optional[int] class ModelInfo(ModelInfoBase, total=False): """ Model info for a given model, this is information found in litellm.model_prices_and_context_window.json """ supported_openai_params: Required[Optional[List[str]]] class GenericStreamingChunk(TypedDict, total=False): text: Required[str] tool_use: Optional[ChatCompletionToolCallChunk] is_finished: Required[bool] finish_reason: Required[str] usage: Required[Optional[ChatCompletionUsageBlock]] index: int # use this dict if you want to return any provider specific fields in the response provider_specific_fields: Optional[Dict[str, Any]] from enum import Enum class CallTypes(Enum): embedding = "embedding" aembedding = "aembedding" completion = "completion" acompletion = "acompletion" atext_completion = "atext_completion" text_completion = "text_completion" image_generation = "image_generation" aimage_generation = "aimage_generation" moderation = "moderation" amoderation = "amoderation" atranscription = "atranscription" transcription = "transcription" aspeech = "aspeech" speech = "speech" rerank = "rerank" arerank = "arerank" arealtime = "_arealtime" create_batch = "create_batch" acreate_batch = "acreate_batch" pass_through = "pass_through_endpoint" CallTypesLiteral = Literal[ "embedding", "aembedding", "completion", "acompletion", "atext_completion", "text_completion", "image_generation", "aimage_generation", "moderation", "amoderation", "atranscription", "transcription", "aspeech", "speech", "rerank", "arerank", "_arealtime", "create_batch", "acreate_batch", "pass_through_endpoint", ] class PassthroughCallTypes(Enum): passthrough_image_generation = "passthrough-image-generation" class TopLogprob(OpenAIObject): token: str """The token.""" bytes: Optional[List[int]] = None """A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token. """ logprob: float """The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. """ class ChatCompletionTokenLogprob(OpenAIObject): token: str """The token.""" bytes: Optional[List[int]] = None """A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token. """ logprob: float """The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely. """ top_logprobs: List[TopLogprob] """List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned. """ def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) class ChoiceLogprobs(OpenAIObject): content: Optional[List[ChatCompletionTokenLogprob]] = None """A list of message content tokens with log probability information.""" def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) class FunctionCall(OpenAIObject): arguments: str name: Optional[str] = None class Function(OpenAIObject): arguments: str name: Optional[ str ] # can be None - openai e.g.: ChoiceDeltaToolCallFunction(arguments='{"', name=None), type=None) def __init__( self, arguments: Optional[Union[Dict, str]], name: Optional[str] = None, **params, ): if arguments is None: arguments = "" elif isinstance(arguments, Dict): arguments = json.dumps(arguments) else: arguments = arguments name = name # Build a dictionary with the structure your BaseModel expects data = {"arguments": arguments, "name": name, **params} super(Function, self).__init__(**data) def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) class ChatCompletionDeltaToolCall(OpenAIObject): id: Optional[str] = None function: Function type: Optional[str] = None index: int class HiddenParams(OpenAIObject): original_response: Optional[Union[str, Any]] = None model_id: Optional[str] = None # used in Router for individual deployments api_base: Optional[str] = None # returns api base used for making completion call model_config = ConfigDict(extra="allow", protected_namespaces=()) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() class ChatCompletionMessageToolCall(OpenAIObject): def __init__( self, function: Union[Dict, Function], id: Optional[str] = None, type: Optional[str] = None, **params, ): super(ChatCompletionMessageToolCall, self).__init__(**params) if isinstance(function, Dict): self.function = Function(**function) else: self.function = function if id is not None: self.id = id else: self.id = f"{uuid.uuid4()}" if type is not None: self.type = type else: self.type = "function" def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) from openai.types.chat.chat_completion_audio import ChatCompletionAudio class ChatCompletionAudioResponse(ChatCompletionAudio): def __init__( self, data: str, expires_at: int, transcript: str, id: Optional[str] = None, **params, ): if id is not None: id = id else: id = f"{uuid.uuid4()}" super(ChatCompletionAudioResponse, self).__init__( data=data, expires_at=expires_at, transcript=transcript, id=id, **params ) def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) """ Reference: ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None)) """ class Message(OpenAIObject): content: Optional[str] role: Literal["assistant", "user", "system", "tool", "function"] tool_calls: Optional[List[ChatCompletionMessageToolCall]] function_call: Optional[FunctionCall] audio: Optional[ChatCompletionAudioResponse] = None provider_specific_fields: Optional[Dict[str, Any]] = Field( default=None, exclude=True ) def __init__( self, content: Optional[str] = None, role: Literal["assistant"] = "assistant", function_call=None, tool_calls: Optional[list] = None, audio: Optional[ChatCompletionAudioResponse] = None, provider_specific_fields: Optional[Dict[str, Any]] = None, **params, ): init_values: Dict[str, Any] = { "content": content, "role": role or "assistant", # handle null input "function_call": ( FunctionCall(**function_call) if function_call is not None else None ), "tool_calls": ( [ ( ChatCompletionMessageToolCall(**tool_call) if isinstance(tool_call, dict) else tool_call ) for tool_call in tool_calls ] if tool_calls is not None and len(tool_calls) > 0 else None ), } if audio is not None: init_values["audio"] = audio super(Message, self).__init__( **init_values, # type: ignore **params, ) if audio is None: # delete audio from self # OpenAI compatible APIs like mistral API will raise an error if audio is passed in del self.audio if provider_specific_fields: # set if provider_specific_fields is not empty self.provider_specific_fields = provider_specific_fields for k, v in provider_specific_fields.items(): setattr(self, k, v) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() class Delta(OpenAIObject): provider_specific_fields: Optional[Dict[str, Any]] = Field( default=None, exclude=True ) def __init__( self, content=None, role=None, function_call=None, tool_calls=None, audio: Optional[ChatCompletionAudioResponse] = None, **params, ): super(Delta, self).__init__(**params) provider_specific_fields: Dict[str, Any] = {} if "reasoning_content" in params: provider_specific_fields["reasoning_content"] = params["reasoning_content"] setattr(self, "reasoning_content", params["reasoning_content"]) self.content = content self.role = role # Set default values and correct types self.function_call: Optional[Union[FunctionCall, Any]] = None self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None self.audio: Optional[ChatCompletionAudioResponse] = None if provider_specific_fields: # set if provider_specific_fields is not empty self.provider_specific_fields = provider_specific_fields if function_call is not None and isinstance(function_call, dict): self.function_call = FunctionCall(**function_call) else: self.function_call = function_call if tool_calls is not None and isinstance(tool_calls, list): self.tool_calls = [] for tool_call in tool_calls: if isinstance(tool_call, dict): if tool_call.get("index", None) is None: tool_call["index"] = 0 self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) elif isinstance(tool_call, ChatCompletionDeltaToolCall): self.tool_calls.append(tool_call) else: self.tool_calls = tool_calls self.audio = audio def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) class Choices(OpenAIObject): def __init__( self, finish_reason=None, index=0, message: Optional[Union[Message, dict]] = None, logprobs=None, enhancements=None, **params, ): super(Choices, self).__init__(**params) if finish_reason is not None: self.finish_reason = map_finish_reason( finish_reason ) # set finish_reason for all responses else: self.finish_reason = "stop" self.index = index if message is None: self.message = Message() else: if isinstance(message, Message): self.message = message elif isinstance(message, dict): self.message = Message(**message) if logprobs is not None: if isinstance(logprobs, dict): self.logprobs = ChoiceLogprobs(**logprobs) else: self.logprobs = logprobs if enhancements is not None: self.enhancements = enhancements def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) class CompletionTokensDetailsWrapper( CompletionTokensDetails ): # wrapper for older openai versions text_tokens: Optional[int] = None """Text tokens generated by the model.""" class PromptTokensDetailsWrapper( PromptTokensDetails ): # wrapper for older openai versions text_tokens: Optional[int] = None """Text tokens sent to the model.""" image_tokens: Optional[int] = None """Image tokens sent to the model.""" class Usage(CompletionUsage): _cache_creation_input_tokens: int = PrivateAttr( 0 ) # hidden param for prompt caching. Might change, once openai introduces their equivalent. _cache_read_input_tokens: int = PrivateAttr( 0 ) # hidden param for prompt caching. Might change, once openai introduces their equivalent. def __init__( self, prompt_tokens: Optional[int] = None, completion_tokens: Optional[int] = None, total_tokens: Optional[int] = None, reasoning_tokens: Optional[int] = None, prompt_tokens_details: Optional[Union[PromptTokensDetailsWrapper, dict]] = None, completion_tokens_details: Optional[ Union[CompletionTokensDetailsWrapper, dict] ] = None, **params, ): # handle reasoning_tokens _completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None if reasoning_tokens: completion_tokens_details = CompletionTokensDetailsWrapper( reasoning_tokens=reasoning_tokens ) # Ensure completion_tokens_details is properly handled if completion_tokens_details: if isinstance(completion_tokens_details, dict): _completion_tokens_details = CompletionTokensDetailsWrapper( **completion_tokens_details ) elif isinstance(completion_tokens_details, CompletionTokensDetails): _completion_tokens_details = completion_tokens_details ## DEEPSEEK MAPPING ## if "prompt_cache_hit_tokens" in params and isinstance( params["prompt_cache_hit_tokens"], int ): if prompt_tokens_details is None: prompt_tokens_details = PromptTokensDetailsWrapper( cached_tokens=params["prompt_cache_hit_tokens"] ) ## ANTHROPIC MAPPING ## if "cache_read_input_tokens" in params and isinstance( params["cache_read_input_tokens"], int ): if prompt_tokens_details is None: prompt_tokens_details = PromptTokensDetailsWrapper( cached_tokens=params["cache_read_input_tokens"] ) # handle prompt_tokens_details _prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None if prompt_tokens_details: if isinstance(prompt_tokens_details, dict): _prompt_tokens_details = PromptTokensDetailsWrapper( **prompt_tokens_details ) elif isinstance(prompt_tokens_details, PromptTokensDetails): _prompt_tokens_details = prompt_tokens_details super().__init__( prompt_tokens=prompt_tokens or 0, completion_tokens=completion_tokens or 0, total_tokens=total_tokens or 0, completion_tokens_details=_completion_tokens_details or None, prompt_tokens_details=_prompt_tokens_details or None, ) ## ANTHROPIC MAPPING ## if "cache_creation_input_tokens" in params and isinstance( params["cache_creation_input_tokens"], int ): self._cache_creation_input_tokens = params["cache_creation_input_tokens"] if "cache_read_input_tokens" in params and isinstance( params["cache_read_input_tokens"], int ): self._cache_read_input_tokens = params["cache_read_input_tokens"] ## DEEPSEEK MAPPING ## if "prompt_cache_hit_tokens" in params and isinstance( params["prompt_cache_hit_tokens"], int ): self._cache_read_input_tokens = params["prompt_cache_hit_tokens"] for k, v in params.items(): setattr(self, k, v) def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) class StreamingChoices(OpenAIObject): def __init__( self, finish_reason=None, index=0, delta: Optional[Delta] = None, logprobs=None, enhancements=None, **params, ): super(StreamingChoices, self).__init__(**params) if finish_reason: self.finish_reason = map_finish_reason(finish_reason) else: self.finish_reason = None self.index = index if delta is not None: if isinstance(delta, Delta): self.delta = delta elif isinstance(delta, dict): self.delta = Delta(**delta) else: self.delta = Delta() if enhancements is not None: self.enhancements = enhancements if logprobs is not None and isinstance(logprobs, dict): self.logprobs = ChoiceLogprobs(**logprobs) else: self.logprobs = logprobs # type: ignore def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) class StreamingChatCompletionChunk(OpenAIChatCompletionChunk): def __init__(self, **kwargs): new_choices = [] for choice in kwargs["choices"]: new_choice = StreamingChoices(**choice).model_dump() new_choices.append(new_choice) kwargs["choices"] = new_choices super().__init__(**kwargs) from openai.types.chat import ChatCompletionChunk class ModelResponseBase(OpenAIObject): id: str """A unique identifier for the completion.""" created: int """The Unix timestamp (in seconds) of when the completion was created.""" model: Optional[str] = None """The model used for completion.""" object: str """The object type, which is always "text_completion" """ system_fingerprint: Optional[str] = None """This fingerprint represents the backend configuration that the model runs with. Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism. """ _hidden_params: dict = {} _response_headers: Optional[dict] = None class ModelResponseStream(ModelResponseBase): choices: List[StreamingChoices] def __init__( self, choices: Optional[List[Union[StreamingChoices, dict, BaseModel]]] = None, id: Optional[str] = None, created: Optional[int] = None, **kwargs, ): if choices is not None and isinstance(choices, list): new_choices = [] for choice in choices: _new_choice = None if isinstance(choice, StreamingChoices): _new_choice = choice elif isinstance(choice, dict): _new_choice = StreamingChoices(**choice) elif isinstance(choice, BaseModel): _new_choice = StreamingChoices(**choice.model_dump()) new_choices.append(_new_choice) kwargs["choices"] = new_choices else: kwargs["choices"] = [StreamingChoices()] if id is None: id = _generate_id() else: id = id if created is None: created = int(time.time()) else: created = created if ( "usage" in kwargs and kwargs["usage"] is not None and isinstance(kwargs["usage"], dict) ): kwargs["usage"] = Usage(**kwargs["usage"]) kwargs["id"] = id kwargs["created"] = created kwargs["object"] = "chat.completion.chunk" super().__init__(**kwargs) def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() class ModelResponse(ModelResponseBase): choices: List[Union[Choices, StreamingChoices]] """The list of completion choices the model generated for the input prompt.""" def __init__( self, id=None, choices=None, created=None, model=None, object=None, system_fingerprint=None, usage=None, stream=None, stream_options=None, response_ms=None, hidden_params=None, _response_headers=None, **params, ) -> None: if stream is not None and stream is True: object = "chat.completion.chunk" if choices is not None and isinstance(choices, list): new_choices = [] for choice in choices: _new_choice = None if isinstance(choice, StreamingChoices): _new_choice = choice elif isinstance(choice, dict): _new_choice = StreamingChoices(**choice) elif isinstance(choice, BaseModel): _new_choice = StreamingChoices(**choice.model_dump()) new_choices.append(_new_choice) choices = new_choices else: choices = [StreamingChoices()] else: object = "chat.completion" if choices is not None and isinstance(choices, list): new_choices = [] for choice in choices: if isinstance(choice, Choices): _new_choice = choice # type: ignore elif isinstance(choice, dict): _new_choice = Choices(**choice) # type: ignore else: _new_choice = choice new_choices.append(_new_choice) choices = new_choices else: choices = [Choices()] if id is None: id = _generate_id() else: id = id if created is None: created = int(time.time()) else: created = created model = model if usage is not None: if isinstance(usage, dict): usage = Usage(**usage) else: usage = usage elif stream is None or stream is False: usage = Usage() if hidden_params: self._hidden_params = hidden_params if _response_headers: self._response_headers = _response_headers init_values = { "id": id, "choices": choices, "created": created, "model": model, "object": object, "system_fingerprint": system_fingerprint, } if usage is not None: init_values["usage"] = usage super().__init__( **init_values, **params, ) def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() class Embedding(OpenAIObject): embedding: Union[list, str] = [] index: int object: Literal["embedding"] def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) class EmbeddingResponse(OpenAIObject): model: Optional[str] = None """The model used for embedding.""" data: List """The actual embedding value""" object: Literal["list"] """The object type, which is always "list" """ usage: Optional[Usage] = None """Usage statistics for the embedding request.""" _hidden_params: dict = {} _response_headers: Optional[Dict] = None _response_ms: Optional[float] = None def __init__( self, model: Optional[str] = None, usage: Optional[Usage] = None, response_ms=None, data: Optional[Union[List, List[Embedding]]] = None, hidden_params=None, _response_headers=None, **params, ): object = "list" if response_ms: _response_ms = response_ms else: _response_ms = None if data: data = data else: data = [] if usage: usage = usage else: usage = Usage() if _response_headers: self._response_headers = _response_headers model = model super().__init__(model=model, object=object, data=data, usage=usage) # type: ignore def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() class Logprobs(OpenAIObject): text_offset: Optional[List[int]] token_logprobs: Optional[List[Union[float, None]]] tokens: Optional[List[str]] top_logprobs: Optional[List[Union[Dict[str, float], None]]] class TextChoices(OpenAIObject): def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): super(TextChoices, self).__init__(**params) if finish_reason: self.finish_reason = map_finish_reason(finish_reason) else: self.finish_reason = None self.index = index if text is not None: self.text = text else: self.text = None if logprobs is None: self.logprobs = None else: if isinstance(logprobs, dict): self.logprobs = Logprobs(**logprobs) else: self.logprobs = logprobs def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() class TextCompletionResponse(OpenAIObject): """ { "id": response["id"], "object": "text_completion", "created": response["created"], "model": response["model"], "choices": [ { "text": response["choices"][0]["message"]["content"], "index": response["choices"][0]["index"], "logprobs": transformed_logprobs, "finish_reason": response["choices"][0]["finish_reason"] } ], "usage": response["usage"] } """ id: str object: str created: int model: Optional[str] choices: List[TextChoices] usage: Optional[Usage] _response_ms: Optional[int] = None _hidden_params: HiddenParams def __init__( self, id=None, choices=None, created=None, model=None, usage=None, stream=False, response_ms=None, object=None, **params, ): if stream: object = "text_completion.chunk" choices = [TextChoices()] else: object = "text_completion" if choices is not None and isinstance(choices, list): new_choices = [] for choice in choices: _new_choice = None if isinstance(choice, TextChoices): _new_choice = choice elif isinstance(choice, dict): _new_choice = TextChoices(**choice) new_choices.append(_new_choice) choices = new_choices else: choices = [TextChoices()] if object is not None: object = object if id is None: id = _generate_id() else: id = id if created is None: created = int(time.time()) else: created = created model = model if usage: usage = usage else: usage = Usage() super(TextCompletionResponse, self).__init__( id=id, # type: ignore object=object, # type: ignore created=created, # type: ignore model=model, # type: ignore choices=choices, # type: ignore usage=usage, # type: ignore **params, ) if response_ms: self._response_ms = response_ms else: self._response_ms = None self._hidden_params = HiddenParams() def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) from openai.types.images_response import Image as OpenAIImage class ImageObject(OpenAIImage): """ Represents the url or the content of an image generated by the OpenAI API. Attributes: b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json. url: The URL of the generated image, if response_format is url (default). revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt. https://platform.openai.com/docs/api-reference/images/object """ b64_json: Optional[str] = None url: Optional[str] = None revised_prompt: Optional[str] = None def __init__(self, b64_json=None, url=None, revised_prompt=None, **kwargs): super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) # type: ignore def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() from openai.types.images_response import ImagesResponse as OpenAIImageResponse class ImageResponse(OpenAIImageResponse): _hidden_params: dict = {} usage: Usage def __init__( self, created: Optional[int] = None, data: Optional[List[ImageObject]] = None, response_ms=None, usage: Optional[Usage] = None, hidden_params: Optional[dict] = None, ): if response_ms: _response_ms = response_ms else: _response_ms = None if data: data = data else: data = [] if created: created = created else: created = int(time.time()) _data: List[OpenAIImage] = [] for d in data: if isinstance(d, dict): _data.append(ImageObject(**d)) elif isinstance(d, BaseModel): _data.append(ImageObject(**d.model_dump())) _usage = usage or Usage( prompt_tokens=0, completion_tokens=0, total_tokens=0, ) super().__init__(created=created, data=_data, usage=_usage) # type: ignore self._hidden_params = hidden_params or {} def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() class TranscriptionResponse(OpenAIObject): text: Optional[str] = None _hidden_params: dict = {} _response_headers: Optional[dict] = None def __init__(self, text=None): super().__init__(text=text) # type: ignore def __contains__(self, key): # Define custom behavior for the 'in' operator return hasattr(self, key) def get(self, key, default=None): # Custom .get() method to access attributes with a default value if the attribute doesn't exist return getattr(self, key, default) def __getitem__(self, key): # Allow dictionary-style access to attributes return getattr(self, key) def __setitem__(self, key, value): # Allow dictionary-style assignment of attributes setattr(self, key, value) def json(self, **kwargs): # type: ignore try: return self.model_dump() # noqa except Exception: # if using pydantic v1 return self.dict() class GenericImageParsingChunk(TypedDict): type: str media_type: str data: str class ResponseFormatChunk(TypedDict, total=False): type: Required[Literal["json_object", "text"]] response_schema: dict class LoggedLiteLLMParams(TypedDict, total=False): force_timeout: Optional[float] custom_llm_provider: Optional[str] api_base: Optional[str] litellm_call_id: Optional[str] model_alias_map: Optional[dict] metadata: Optional[dict] model_info: Optional[dict] proxy_server_request: Optional[dict] acompletion: Optional[bool] preset_cache_key: Optional[str] no_log: Optional[bool] input_cost_per_second: Optional[float] input_cost_per_token: Optional[float] output_cost_per_token: Optional[float] output_cost_per_second: Optional[float] cooldown_time: Optional[float] class AdapterCompletionStreamWrapper: def __init__(self, completion_stream): self.completion_stream = completion_stream def __iter__(self): return self def __aiter__(self): return self def __next__(self): try: for chunk in self.completion_stream: if chunk == "None" or chunk is None: raise Exception return chunk raise StopIteration except StopIteration: raise StopIteration except Exception as e: print(f"AdapterCompletionStreamWrapper - {e}") # noqa async def __anext__(self): try: async for chunk in self.completion_stream: if chunk == "None" or chunk is None: raise Exception return chunk raise StopIteration except StopIteration: raise StopAsyncIteration class StandardLoggingUserAPIKeyMetadata(TypedDict): user_api_key_hash: Optional[str] # hash of the litellm virtual key used user_api_key_alias: Optional[str] user_api_key_org_id: Optional[str] user_api_key_team_id: Optional[str] user_api_key_user_id: Optional[str] user_api_key_team_alias: Optional[str] user_api_key_end_user_id: Optional[str] class StandardLoggingPromptManagementMetadata(TypedDict): prompt_id: str prompt_variables: Optional[dict] prompt_integration: str class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata): """ Specific metadata k,v pairs logged to integration for easier cost tracking and prompt management """ spend_logs_metadata: Optional[ dict ] # special param to log k,v pairs to spendlogs for a call requester_ip_address: Optional[str] requester_metadata: Optional[dict] prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata] class StandardLoggingAdditionalHeaders(TypedDict, total=False): x_ratelimit_limit_requests: int x_ratelimit_limit_tokens: int x_ratelimit_remaining_requests: int x_ratelimit_remaining_tokens: int class StandardLoggingHiddenParams(TypedDict): model_id: Optional[str] cache_key: Optional[str] api_base: Optional[str] response_cost: Optional[str] litellm_overhead_time_ms: Optional[float] additional_headers: Optional[StandardLoggingAdditionalHeaders] class StandardLoggingModelInformation(TypedDict): model_map_key: str model_map_value: Optional[ModelInfo] class StandardLoggingModelCostFailureDebugInformation(TypedDict, total=False): """ Debug information, if cost tracking fails. Avoid logging sensitive information like response or optional params """ error_str: Required[str] traceback_str: Required[str] model: str cache_hit: Optional[bool] custom_llm_provider: Optional[str] base_model: Optional[str] call_type: str custom_pricing: Optional[bool] class StandardLoggingPayloadErrorInformation(TypedDict, total=False): error_code: Optional[str] error_class: Optional[str] llm_provider: Optional[str] class StandardLoggingGuardrailInformation(TypedDict, total=False): guardrail_name: Optional[str] guardrail_mode: Optional[Union[GuardrailEventHooks, List[GuardrailEventHooks]]] guardrail_response: Optional[Union[dict, str]] guardrail_status: Literal["success", "failure"] StandardLoggingPayloadStatus = Literal["success", "failure"] class StandardLoggingPayload(TypedDict): id: str trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries) call_type: str stream: Optional[bool] response_cost: float response_cost_failure_debug_info: Optional[ StandardLoggingModelCostFailureDebugInformation ] status: StandardLoggingPayloadStatus custom_llm_provider: Optional[str] total_tokens: int prompt_tokens: int completion_tokens: int startTime: float # Note: making this camelCase was a mistake, everything should be snake case endTime: float completionStartTime: float response_time: float model_map_information: StandardLoggingModelInformation model: str model_id: Optional[str] model_group: Optional[str] api_base: str metadata: StandardLoggingMetadata cache_hit: Optional[bool] cache_key: Optional[str] saved_cache_cost: float request_tags: list end_user: Optional[str] requester_ip_address: Optional[str] messages: Optional[Union[str, list, dict]] response: Optional[Union[str, list, dict]] error_str: Optional[str] error_information: Optional[StandardLoggingPayloadErrorInformation] model_parameters: dict hidden_params: StandardLoggingHiddenParams guardrail_information: Optional[StandardLoggingGuardrailInformation] from typing import AsyncIterator, Iterator class CustomStreamingDecoder: async def aiter_bytes( self, iterator: AsyncIterator[bytes] ) -> AsyncIterator[ Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]] ]: raise NotImplementedError def iter_bytes( self, iterator: Iterator[bytes] ) -> Iterator[Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]]: raise NotImplementedError class StandardPassThroughResponseObject(TypedDict): response: str OPENAI_RESPONSE_HEADERS = [ "x-ratelimit-remaining-requests", "x-ratelimit-remaining-tokens", "x-ratelimit-limit-requests", "x-ratelimit-limit-tokens", "x-ratelimit-reset-requests", "x-ratelimit-reset-tokens", ] class StandardCallbackDynamicParams(TypedDict, total=False): # Langfuse dynamic params langfuse_public_key: Optional[str] langfuse_secret: Optional[str] langfuse_secret_key: Optional[str] langfuse_host: Optional[str] # GCS dynamic params gcs_bucket_name: Optional[str] gcs_path_service_account: Optional[str] # Langsmith dynamic params langsmith_api_key: Optional[str] langsmith_project: Optional[str] langsmith_base_url: Optional[str] # Humanloop dynamic params humanloop_api_key: Optional[str] # Logging settings turn_off_message_logging: Optional[bool] # when true will not log messages all_litellm_params = [ "metadata", "litellm_metadata", "litellm_trace_id", "tags", "acompletion", "aimg_generation", "atext_completion", "text_completion", "caching", "mock_response", "mock_timeout", "disable_add_transform_inline_image_block", "api_key", "api_version", "prompt_id", "provider_specific_header", "prompt_variables", "api_base", "force_timeout", "logger_fn", "verbose", "custom_llm_provider", "litellm_logging_obj", "litellm_call_id", "use_client", "id", "fallbacks", "azure", "headers", "model_list", "num_retries", "context_window_fallback_dict", "retry_policy", "retry_strategy", "roles", "final_prompt_value", "bos_token", "eos_token", "request_timeout", "complete_response", "self", "client", "rpm", "tpm", "max_parallel_requests", "input_cost_per_token", "output_cost_per_token", "input_cost_per_second", "output_cost_per_second", "hf_model_name", "model_info", "proxy_server_request", "preset_cache_key", "caching_groups", "ttl", "cache", "no-log", "base_model", "stream_timeout", "supports_system_message", "region_name", "allowed_model_region", "model_config", "fastest_response", "cooldown_time", "cache_key", "max_retries", "azure_ad_token_provider", "tenant_id", "client_id", "azure_username", "azure_password", "client_secret", "user_continue_message", "configurable_clientside_auth_params", "weight", "ensure_alternating_roles", "assistant_continue_message", "user_continue_message", "fallback_depth", "max_fallbacks", "max_budget", "budget_duration", "use_in_pass_through", ] + list(StandardCallbackDynamicParams.__annotations__.keys()) class KeyGenerationConfig(TypedDict, total=False): required_params: List[ str ] # specify params that must be present in the key generation request class TeamUIKeyGenerationConfig(KeyGenerationConfig): allowed_team_member_roles: List[str] class PersonalUIKeyGenerationConfig(KeyGenerationConfig): allowed_user_roles: List[str] class StandardKeyGenerationConfig(TypedDict, total=False): team_key_generation: TeamUIKeyGenerationConfig personal_key_generation: PersonalUIKeyGenerationConfig class BudgetConfig(BaseModel): max_budget: Optional[float] = None budget_duration: Optional[str] = None tpm_limit: Optional[int] = None rpm_limit: Optional[int] = None def __init__(self, **data: Any) -> None: # Map time_period to budget_duration if present if "time_period" in data: data["budget_duration"] = data.pop("time_period") # Map budget_limit to max_budget if present if "budget_limit" in data: data["max_budget"] = data.pop("budget_limit") super().__init__(**data) GenericBudgetConfigType = Dict[str, BudgetConfig] class LlmProviders(str, Enum): OPENAI = "openai" OPENAI_LIKE = "openai_like" # embedding only JINA_AI = "jina_ai" XAI = "xai" CUSTOM_OPENAI = "custom_openai" TEXT_COMPLETION_OPENAI = "text-completion-openai" COHERE = "cohere" COHERE_CHAT = "cohere_chat" CLARIFAI = "clarifai" ANTHROPIC = "anthropic" ANTHROPIC_TEXT = "anthropic_text" REPLICATE = "replicate" HUGGINGFACE = "huggingface" TOGETHER_AI = "together_ai" OPENROUTER = "openrouter" VERTEX_AI = "vertex_ai" VERTEX_AI_BETA = "vertex_ai_beta" GEMINI = "gemini" AI21 = "ai21" BASETEN = "baseten" AZURE = "azure" AZURE_TEXT = "azure_text" AZURE_AI = "azure_ai" SAGEMAKER = "sagemaker" SAGEMAKER_CHAT = "sagemaker_chat" BEDROCK = "bedrock" VLLM = "vllm" NLP_CLOUD = "nlp_cloud" PETALS = "petals" OOBABOOGA = "oobabooga" OLLAMA = "ollama" OLLAMA_CHAT = "ollama_chat" DEEPINFRA = "deepinfra" PERPLEXITY = "perplexity" MISTRAL = "mistral" GROQ = "groq" NVIDIA_NIM = "nvidia_nim" CEREBRAS = "cerebras" AI21_CHAT = "ai21_chat" VOLCENGINE = "volcengine" CODESTRAL = "codestral" TEXT_COMPLETION_CODESTRAL = "text-completion-codestral" DEEPSEEK = "deepseek" SAMBANOVA = "sambanova" MARITALK = "maritalk" VOYAGE = "voyage" CLOUDFLARE = "cloudflare" XINFERENCE = "xinference" FIREWORKS_AI = "fireworks_ai" FRIENDLIAI = "friendliai" WATSONX = "watsonx" WATSONX_TEXT = "watsonx_text" TRITON = "triton" PREDIBASE = "predibase" DATABRICKS = "databricks" EMPOWER = "empower" GITHUB = "github" CUSTOM = "custom" LITELLM_PROXY = "litellm_proxy" HOSTED_VLLM = "hosted_vllm" LM_STUDIO = "lm_studio" GALADRIEL = "galadriel" INFINITY = "infinity" DEEPGRAM = "deepgram" AIOHTTP_OPENAI = "aiohttp_openai" LANGFUSE = "langfuse" HUMANLOOP = "humanloop" TOPAZ = "topaz" # Create a set of all provider values for quick lookup LlmProvidersSet = {provider.value for provider in LlmProviders} class LiteLLMLoggingBaseClass: """ Base class for logging pre and post call Meant to simplify type checking for logging obj. """ def pre_call(self, input, api_key, model=None, additional_args={}): pass def post_call( self, original_response, input=None, api_key=None, additional_args={} ): pass class CustomHuggingfaceTokenizer(TypedDict): identifier: str revision: str # usually 'main' auth_token: Optional[str] class LITELLM_IMAGE_VARIATION_PROVIDERS(Enum): """ Try using an enum for endpoints. This should make it easier to track what provider is supported for what endpoint. """ OPENAI = LlmProviders.OPENAI.value TOPAZ = LlmProviders.TOPAZ.value class HttpHandlerRequestFields(TypedDict, total=False): data: dict # request body params: dict # query params files: dict # file uploads content: Any # raw content class ProviderSpecificHeader(TypedDict): custom_llm_provider: str extra_headers: dict class SelectTokenizerResponse(TypedDict): type: Literal["openai_tokenizer", "huggingface_tokenizer"] tokenizer: Any