|
import json |
|
import time |
|
import uuid |
|
from enum import Enum |
|
from typing import Any, Dict, List, Literal, Optional, Tuple, Union |
|
|
|
from aiohttp import FormData |
|
from openai._models import BaseModel as OpenAIObject |
|
from openai.types.audio.transcription_create_params import FileTypes |
|
from openai.types.completion_usage import ( |
|
CompletionTokensDetails, |
|
CompletionUsage, |
|
PromptTokensDetails, |
|
) |
|
from openai.types.moderation import ( |
|
Categories, |
|
CategoryAppliedInputTypes, |
|
CategoryScores, |
|
) |
|
from openai.types.moderation_create_response import Moderation, ModerationCreateResponse |
|
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr |
|
from typing_extensions import Callable, Dict, Required, TypedDict, override |
|
|
|
from ..litellm_core_utils.core_helpers import map_finish_reason |
|
from .guardrails import GuardrailEventHooks |
|
from .llms.openai import ( |
|
ChatCompletionToolCallChunk, |
|
ChatCompletionUsageBlock, |
|
OpenAIChatCompletionChunk, |
|
) |
|
from .rerank import RerankResponse |
|
|
|
|
|
def _generate_id(): |
|
return "chatcmpl-" + str(uuid.uuid4()) |
|
|
|
|
|
class LiteLLMPydanticObjectBase(BaseModel): |
|
""" |
|
Implements default functions, all pydantic objects should have. |
|
""" |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump(**kwargs) |
|
except Exception: |
|
|
|
return self.dict(**kwargs) |
|
|
|
def fields_set(self): |
|
try: |
|
return self.model_fields_set |
|
except Exception: |
|
|
|
return self.__fields_set__ |
|
|
|
model_config = ConfigDict(protected_namespaces=()) |
|
|
|
|
|
class LiteLLMCommonStrings(Enum): |
|
redacted_by_litellm = "redacted by litellm. 'litellm.turn_off_message_logging=True'" |
|
|
|
|
|
SupportedCacheControls = ["ttl", "s-maxage", "no-cache", "no-store"] |
|
|
|
|
|
class CostPerToken(TypedDict): |
|
input_cost_per_token: float |
|
output_cost_per_token: float |
|
|
|
|
|
class ProviderField(TypedDict): |
|
field_name: str |
|
field_type: Literal["string"] |
|
field_description: str |
|
field_value: str |
|
|
|
|
|
class ProviderSpecificModelInfo(TypedDict, total=False): |
|
supports_system_messages: Optional[bool] |
|
supports_response_schema: Optional[bool] |
|
supports_vision: Optional[bool] |
|
supports_function_calling: Optional[bool] |
|
supports_tool_choice: Optional[bool] |
|
supports_assistant_prefill: Optional[bool] |
|
supports_prompt_caching: Optional[bool] |
|
supports_audio_input: Optional[bool] |
|
supports_embedding_image_input: Optional[bool] |
|
supports_audio_output: Optional[bool] |
|
supports_pdf_input: Optional[bool] |
|
supports_native_streaming: Optional[bool] |
|
supports_parallel_function_calling: Optional[bool] |
|
|
|
|
|
class ModelInfoBase(ProviderSpecificModelInfo, total=False): |
|
key: Required[str] |
|
|
|
max_tokens: Required[Optional[int]] |
|
max_input_tokens: Required[Optional[int]] |
|
max_output_tokens: Required[Optional[int]] |
|
input_cost_per_token: Required[float] |
|
cache_creation_input_token_cost: Optional[float] |
|
cache_read_input_token_cost: Optional[float] |
|
input_cost_per_character: Optional[float] |
|
input_cost_per_audio_token: Optional[float] |
|
input_cost_per_token_above_128k_tokens: Optional[float] |
|
input_cost_per_character_above_128k_tokens: Optional[ |
|
float |
|
] |
|
input_cost_per_query: Optional[float] |
|
input_cost_per_image: Optional[float] |
|
input_cost_per_audio_per_second: Optional[float] |
|
input_cost_per_video_per_second: Optional[float] |
|
input_cost_per_second: Optional[float] |
|
output_cost_per_token: Required[float] |
|
output_cost_per_character: Optional[float] |
|
output_cost_per_audio_token: Optional[float] |
|
output_cost_per_token_above_128k_tokens: Optional[ |
|
float |
|
] |
|
output_cost_per_character_above_128k_tokens: Optional[ |
|
float |
|
] |
|
output_cost_per_image: Optional[float] |
|
output_vector_size: Optional[int] |
|
output_cost_per_video_per_second: Optional[float] |
|
output_cost_per_audio_per_second: Optional[float] |
|
output_cost_per_second: Optional[float] |
|
|
|
litellm_provider: Required[str] |
|
mode: Required[ |
|
Literal[ |
|
"completion", "embedding", "image_generation", "chat", "audio_transcription" |
|
] |
|
] |
|
tpm: Optional[int] |
|
rpm: Optional[int] |
|
|
|
|
|
class ModelInfo(ModelInfoBase, total=False): |
|
""" |
|
Model info for a given model, this is information found in litellm.model_prices_and_context_window.json |
|
""" |
|
|
|
supported_openai_params: Required[Optional[List[str]]] |
|
|
|
|
|
class GenericStreamingChunk(TypedDict, total=False): |
|
text: Required[str] |
|
tool_use: Optional[ChatCompletionToolCallChunk] |
|
is_finished: Required[bool] |
|
finish_reason: Required[str] |
|
usage: Required[Optional[ChatCompletionUsageBlock]] |
|
index: int |
|
|
|
|
|
provider_specific_fields: Optional[Dict[str, Any]] |
|
|
|
|
|
from enum import Enum |
|
|
|
|
|
class CallTypes(Enum): |
|
embedding = "embedding" |
|
aembedding = "aembedding" |
|
completion = "completion" |
|
acompletion = "acompletion" |
|
atext_completion = "atext_completion" |
|
text_completion = "text_completion" |
|
image_generation = "image_generation" |
|
aimage_generation = "aimage_generation" |
|
moderation = "moderation" |
|
amoderation = "amoderation" |
|
atranscription = "atranscription" |
|
transcription = "transcription" |
|
aspeech = "aspeech" |
|
speech = "speech" |
|
rerank = "rerank" |
|
arerank = "arerank" |
|
arealtime = "_arealtime" |
|
create_batch = "create_batch" |
|
acreate_batch = "acreate_batch" |
|
pass_through = "pass_through_endpoint" |
|
|
|
|
|
CallTypesLiteral = Literal[ |
|
"embedding", |
|
"aembedding", |
|
"completion", |
|
"acompletion", |
|
"atext_completion", |
|
"text_completion", |
|
"image_generation", |
|
"aimage_generation", |
|
"moderation", |
|
"amoderation", |
|
"atranscription", |
|
"transcription", |
|
"aspeech", |
|
"speech", |
|
"rerank", |
|
"arerank", |
|
"_arealtime", |
|
"create_batch", |
|
"acreate_batch", |
|
"pass_through_endpoint", |
|
] |
|
|
|
|
|
class PassthroughCallTypes(Enum): |
|
passthrough_image_generation = "passthrough-image-generation" |
|
|
|
|
|
class TopLogprob(OpenAIObject): |
|
token: str |
|
"""The token.""" |
|
|
|
bytes: Optional[List[int]] = None |
|
"""A list of integers representing the UTF-8 bytes representation of the token. |
|
|
|
Useful in instances where characters are represented by multiple tokens and |
|
their byte representations must be combined to generate the correct text |
|
representation. Can be `null` if there is no bytes representation for the token. |
|
""" |
|
|
|
logprob: float |
|
"""The log probability of this token, if it is within the top 20 most likely |
|
tokens. |
|
|
|
Otherwise, the value `-9999.0` is used to signify that the token is very |
|
unlikely. |
|
""" |
|
|
|
|
|
class ChatCompletionTokenLogprob(OpenAIObject): |
|
token: str |
|
"""The token.""" |
|
|
|
bytes: Optional[List[int]] = None |
|
"""A list of integers representing the UTF-8 bytes representation of the token. |
|
|
|
Useful in instances where characters are represented by multiple tokens and |
|
their byte representations must be combined to generate the correct text |
|
representation. Can be `null` if there is no bytes representation for the token. |
|
""" |
|
|
|
logprob: float |
|
"""The log probability of this token, if it is within the top 20 most likely |
|
tokens. |
|
|
|
Otherwise, the value `-9999.0` is used to signify that the token is very |
|
unlikely. |
|
""" |
|
|
|
top_logprobs: List[TopLogprob] |
|
"""List of the most likely tokens and their log probability, at this token |
|
position. |
|
|
|
In rare cases, there may be fewer than the number of requested `top_logprobs` |
|
returned. |
|
""" |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
|
|
class ChoiceLogprobs(OpenAIObject): |
|
content: Optional[List[ChatCompletionTokenLogprob]] = None |
|
"""A list of message content tokens with log probability information.""" |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
|
|
class FunctionCall(OpenAIObject): |
|
arguments: str |
|
name: Optional[str] = None |
|
|
|
|
|
class Function(OpenAIObject): |
|
arguments: str |
|
name: Optional[ |
|
str |
|
] |
|
|
|
def __init__( |
|
self, |
|
arguments: Optional[Union[Dict, str]], |
|
name: Optional[str] = None, |
|
**params, |
|
): |
|
if arguments is None: |
|
arguments = "" |
|
elif isinstance(arguments, Dict): |
|
arguments = json.dumps(arguments) |
|
else: |
|
arguments = arguments |
|
|
|
name = name |
|
|
|
|
|
data = {"arguments": arguments, "name": name, **params} |
|
|
|
super(Function, self).__init__(**data) |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
class ChatCompletionDeltaToolCall(OpenAIObject): |
|
id: Optional[str] = None |
|
function: Function |
|
type: Optional[str] = None |
|
index: int |
|
|
|
|
|
class HiddenParams(OpenAIObject): |
|
original_response: Optional[Union[str, Any]] = None |
|
model_id: Optional[str] = None |
|
api_base: Optional[str] = None |
|
|
|
model_config = ConfigDict(extra="allow", protected_namespaces=()) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
class ChatCompletionMessageToolCall(OpenAIObject): |
|
def __init__( |
|
self, |
|
function: Union[Dict, Function], |
|
id: Optional[str] = None, |
|
type: Optional[str] = None, |
|
**params, |
|
): |
|
super(ChatCompletionMessageToolCall, self).__init__(**params) |
|
if isinstance(function, Dict): |
|
self.function = Function(**function) |
|
else: |
|
self.function = function |
|
|
|
if id is not None: |
|
self.id = id |
|
else: |
|
self.id = f"{uuid.uuid4()}" |
|
|
|
if type is not None: |
|
self.type = type |
|
else: |
|
self.type = "function" |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
from openai.types.chat.chat_completion_audio import ChatCompletionAudio |
|
|
|
|
|
class ChatCompletionAudioResponse(ChatCompletionAudio): |
|
|
|
def __init__( |
|
self, |
|
data: str, |
|
expires_at: int, |
|
transcript: str, |
|
id: Optional[str] = None, |
|
**params, |
|
): |
|
if id is not None: |
|
id = id |
|
else: |
|
id = f"{uuid.uuid4()}" |
|
super(ChatCompletionAudioResponse, self).__init__( |
|
data=data, expires_at=expires_at, transcript=transcript, id=id, **params |
|
) |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
""" |
|
Reference: |
|
ChatCompletionMessage(content='This is a test', role='assistant', function_call=None, tool_calls=None)) |
|
""" |
|
|
|
|
|
class Message(OpenAIObject): |
|
content: Optional[str] |
|
role: Literal["assistant", "user", "system", "tool", "function"] |
|
tool_calls: Optional[List[ChatCompletionMessageToolCall]] |
|
function_call: Optional[FunctionCall] |
|
audio: Optional[ChatCompletionAudioResponse] = None |
|
provider_specific_fields: Optional[Dict[str, Any]] = Field( |
|
default=None, exclude=True |
|
) |
|
|
|
def __init__( |
|
self, |
|
content: Optional[str] = None, |
|
role: Literal["assistant"] = "assistant", |
|
function_call=None, |
|
tool_calls: Optional[list] = None, |
|
audio: Optional[ChatCompletionAudioResponse] = None, |
|
provider_specific_fields: Optional[Dict[str, Any]] = None, |
|
**params, |
|
): |
|
init_values: Dict[str, Any] = { |
|
"content": content, |
|
"role": role or "assistant", |
|
"function_call": ( |
|
FunctionCall(**function_call) if function_call is not None else None |
|
), |
|
"tool_calls": ( |
|
[ |
|
( |
|
ChatCompletionMessageToolCall(**tool_call) |
|
if isinstance(tool_call, dict) |
|
else tool_call |
|
) |
|
for tool_call in tool_calls |
|
] |
|
if tool_calls is not None and len(tool_calls) > 0 |
|
else None |
|
), |
|
} |
|
|
|
if audio is not None: |
|
init_values["audio"] = audio |
|
|
|
super(Message, self).__init__( |
|
**init_values, |
|
**params, |
|
) |
|
|
|
if audio is None: |
|
|
|
|
|
del self.audio |
|
|
|
if provider_specific_fields: |
|
self.provider_specific_fields = provider_specific_fields |
|
for k, v in provider_specific_fields.items(): |
|
setattr(self, k, v) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
class Delta(OpenAIObject): |
|
provider_specific_fields: Optional[Dict[str, Any]] = Field( |
|
default=None, exclude=True |
|
) |
|
|
|
def __init__( |
|
self, |
|
content=None, |
|
role=None, |
|
function_call=None, |
|
tool_calls=None, |
|
audio: Optional[ChatCompletionAudioResponse] = None, |
|
**params, |
|
): |
|
super(Delta, self).__init__(**params) |
|
provider_specific_fields: Dict[str, Any] = {} |
|
if "reasoning_content" in params: |
|
provider_specific_fields["reasoning_content"] = params["reasoning_content"] |
|
setattr(self, "reasoning_content", params["reasoning_content"]) |
|
self.content = content |
|
self.role = role |
|
|
|
self.function_call: Optional[Union[FunctionCall, Any]] = None |
|
self.tool_calls: Optional[List[Union[ChatCompletionDeltaToolCall, Any]]] = None |
|
self.audio: Optional[ChatCompletionAudioResponse] = None |
|
|
|
if provider_specific_fields: |
|
self.provider_specific_fields = provider_specific_fields |
|
|
|
if function_call is not None and isinstance(function_call, dict): |
|
self.function_call = FunctionCall(**function_call) |
|
else: |
|
self.function_call = function_call |
|
if tool_calls is not None and isinstance(tool_calls, list): |
|
self.tool_calls = [] |
|
for tool_call in tool_calls: |
|
if isinstance(tool_call, dict): |
|
if tool_call.get("index", None) is None: |
|
tool_call["index"] = 0 |
|
self.tool_calls.append(ChatCompletionDeltaToolCall(**tool_call)) |
|
elif isinstance(tool_call, ChatCompletionDeltaToolCall): |
|
self.tool_calls.append(tool_call) |
|
else: |
|
self.tool_calls = tool_calls |
|
|
|
self.audio = audio |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
class Choices(OpenAIObject): |
|
def __init__( |
|
self, |
|
finish_reason=None, |
|
index=0, |
|
message: Optional[Union[Message, dict]] = None, |
|
logprobs=None, |
|
enhancements=None, |
|
**params, |
|
): |
|
super(Choices, self).__init__(**params) |
|
if finish_reason is not None: |
|
self.finish_reason = map_finish_reason( |
|
finish_reason |
|
) |
|
else: |
|
self.finish_reason = "stop" |
|
self.index = index |
|
if message is None: |
|
self.message = Message() |
|
else: |
|
if isinstance(message, Message): |
|
self.message = message |
|
elif isinstance(message, dict): |
|
self.message = Message(**message) |
|
if logprobs is not None: |
|
if isinstance(logprobs, dict): |
|
self.logprobs = ChoiceLogprobs(**logprobs) |
|
else: |
|
self.logprobs = logprobs |
|
if enhancements is not None: |
|
self.enhancements = enhancements |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
class CompletionTokensDetailsWrapper( |
|
CompletionTokensDetails |
|
): |
|
text_tokens: Optional[int] = None |
|
"""Text tokens generated by the model.""" |
|
|
|
|
|
class PromptTokensDetailsWrapper( |
|
PromptTokensDetails |
|
): |
|
text_tokens: Optional[int] = None |
|
"""Text tokens sent to the model.""" |
|
|
|
image_tokens: Optional[int] = None |
|
"""Image tokens sent to the model.""" |
|
|
|
|
|
class Usage(CompletionUsage): |
|
_cache_creation_input_tokens: int = PrivateAttr( |
|
0 |
|
) |
|
_cache_read_input_tokens: int = PrivateAttr( |
|
0 |
|
) |
|
|
|
def __init__( |
|
self, |
|
prompt_tokens: Optional[int] = None, |
|
completion_tokens: Optional[int] = None, |
|
total_tokens: Optional[int] = None, |
|
reasoning_tokens: Optional[int] = None, |
|
prompt_tokens_details: Optional[Union[PromptTokensDetailsWrapper, dict]] = None, |
|
completion_tokens_details: Optional[ |
|
Union[CompletionTokensDetailsWrapper, dict] |
|
] = None, |
|
**params, |
|
): |
|
|
|
_completion_tokens_details: Optional[CompletionTokensDetailsWrapper] = None |
|
if reasoning_tokens: |
|
completion_tokens_details = CompletionTokensDetailsWrapper( |
|
reasoning_tokens=reasoning_tokens |
|
) |
|
|
|
|
|
if completion_tokens_details: |
|
if isinstance(completion_tokens_details, dict): |
|
_completion_tokens_details = CompletionTokensDetailsWrapper( |
|
**completion_tokens_details |
|
) |
|
elif isinstance(completion_tokens_details, CompletionTokensDetails): |
|
_completion_tokens_details = completion_tokens_details |
|
|
|
|
|
if "prompt_cache_hit_tokens" in params and isinstance( |
|
params["prompt_cache_hit_tokens"], int |
|
): |
|
if prompt_tokens_details is None: |
|
prompt_tokens_details = PromptTokensDetailsWrapper( |
|
cached_tokens=params["prompt_cache_hit_tokens"] |
|
) |
|
|
|
|
|
if "cache_read_input_tokens" in params and isinstance( |
|
params["cache_read_input_tokens"], int |
|
): |
|
if prompt_tokens_details is None: |
|
prompt_tokens_details = PromptTokensDetailsWrapper( |
|
cached_tokens=params["cache_read_input_tokens"] |
|
) |
|
|
|
|
|
_prompt_tokens_details: Optional[PromptTokensDetailsWrapper] = None |
|
if prompt_tokens_details: |
|
if isinstance(prompt_tokens_details, dict): |
|
_prompt_tokens_details = PromptTokensDetailsWrapper( |
|
**prompt_tokens_details |
|
) |
|
elif isinstance(prompt_tokens_details, PromptTokensDetails): |
|
_prompt_tokens_details = prompt_tokens_details |
|
|
|
super().__init__( |
|
prompt_tokens=prompt_tokens or 0, |
|
completion_tokens=completion_tokens or 0, |
|
total_tokens=total_tokens or 0, |
|
completion_tokens_details=_completion_tokens_details or None, |
|
prompt_tokens_details=_prompt_tokens_details or None, |
|
) |
|
|
|
|
|
if "cache_creation_input_tokens" in params and isinstance( |
|
params["cache_creation_input_tokens"], int |
|
): |
|
self._cache_creation_input_tokens = params["cache_creation_input_tokens"] |
|
|
|
if "cache_read_input_tokens" in params and isinstance( |
|
params["cache_read_input_tokens"], int |
|
): |
|
self._cache_read_input_tokens = params["cache_read_input_tokens"] |
|
|
|
|
|
if "prompt_cache_hit_tokens" in params and isinstance( |
|
params["prompt_cache_hit_tokens"], int |
|
): |
|
self._cache_read_input_tokens = params["prompt_cache_hit_tokens"] |
|
|
|
for k, v in params.items(): |
|
setattr(self, k, v) |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
class StreamingChoices(OpenAIObject): |
|
def __init__( |
|
self, |
|
finish_reason=None, |
|
index=0, |
|
delta: Optional[Delta] = None, |
|
logprobs=None, |
|
enhancements=None, |
|
**params, |
|
): |
|
super(StreamingChoices, self).__init__(**params) |
|
if finish_reason: |
|
self.finish_reason = map_finish_reason(finish_reason) |
|
else: |
|
self.finish_reason = None |
|
self.index = index |
|
if delta is not None: |
|
if isinstance(delta, Delta): |
|
self.delta = delta |
|
elif isinstance(delta, dict): |
|
self.delta = Delta(**delta) |
|
else: |
|
self.delta = Delta() |
|
if enhancements is not None: |
|
self.enhancements = enhancements |
|
|
|
if logprobs is not None and isinstance(logprobs, dict): |
|
self.logprobs = ChoiceLogprobs(**logprobs) |
|
else: |
|
self.logprobs = logprobs |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
class StreamingChatCompletionChunk(OpenAIChatCompletionChunk): |
|
def __init__(self, **kwargs): |
|
|
|
new_choices = [] |
|
for choice in kwargs["choices"]: |
|
new_choice = StreamingChoices(**choice).model_dump() |
|
new_choices.append(new_choice) |
|
kwargs["choices"] = new_choices |
|
|
|
super().__init__(**kwargs) |
|
|
|
|
|
from openai.types.chat import ChatCompletionChunk |
|
|
|
|
|
class ModelResponseBase(OpenAIObject): |
|
id: str |
|
"""A unique identifier for the completion.""" |
|
|
|
created: int |
|
"""The Unix timestamp (in seconds) of when the completion was created.""" |
|
|
|
model: Optional[str] = None |
|
"""The model used for completion.""" |
|
|
|
object: str |
|
"""The object type, which is always "text_completion" """ |
|
|
|
system_fingerprint: Optional[str] = None |
|
"""This fingerprint represents the backend configuration that the model runs with. |
|
|
|
Can be used in conjunction with the `seed` request parameter to understand when |
|
backend changes have been made that might impact determinism. |
|
""" |
|
|
|
_hidden_params: dict = {} |
|
|
|
_response_headers: Optional[dict] = None |
|
|
|
|
|
class ModelResponseStream(ModelResponseBase): |
|
choices: List[StreamingChoices] |
|
|
|
def __init__( |
|
self, |
|
choices: Optional[List[Union[StreamingChoices, dict, BaseModel]]] = None, |
|
id: Optional[str] = None, |
|
created: Optional[int] = None, |
|
**kwargs, |
|
): |
|
if choices is not None and isinstance(choices, list): |
|
new_choices = [] |
|
for choice in choices: |
|
_new_choice = None |
|
if isinstance(choice, StreamingChoices): |
|
_new_choice = choice |
|
elif isinstance(choice, dict): |
|
_new_choice = StreamingChoices(**choice) |
|
elif isinstance(choice, BaseModel): |
|
_new_choice = StreamingChoices(**choice.model_dump()) |
|
new_choices.append(_new_choice) |
|
kwargs["choices"] = new_choices |
|
else: |
|
kwargs["choices"] = [StreamingChoices()] |
|
|
|
if id is None: |
|
id = _generate_id() |
|
else: |
|
id = id |
|
if created is None: |
|
created = int(time.time()) |
|
else: |
|
created = created |
|
|
|
if ( |
|
"usage" in kwargs |
|
and kwargs["usage"] is not None |
|
and isinstance(kwargs["usage"], dict) |
|
): |
|
kwargs["usage"] = Usage(**kwargs["usage"]) |
|
|
|
kwargs["id"] = id |
|
kwargs["created"] = created |
|
kwargs["object"] = "chat.completion.chunk" |
|
|
|
super().__init__(**kwargs) |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
class ModelResponse(ModelResponseBase): |
|
choices: List[Union[Choices, StreamingChoices]] |
|
"""The list of completion choices the model generated for the input prompt.""" |
|
|
|
def __init__( |
|
self, |
|
id=None, |
|
choices=None, |
|
created=None, |
|
model=None, |
|
object=None, |
|
system_fingerprint=None, |
|
usage=None, |
|
stream=None, |
|
stream_options=None, |
|
response_ms=None, |
|
hidden_params=None, |
|
_response_headers=None, |
|
**params, |
|
) -> None: |
|
if stream is not None and stream is True: |
|
object = "chat.completion.chunk" |
|
if choices is not None and isinstance(choices, list): |
|
new_choices = [] |
|
for choice in choices: |
|
_new_choice = None |
|
if isinstance(choice, StreamingChoices): |
|
_new_choice = choice |
|
elif isinstance(choice, dict): |
|
_new_choice = StreamingChoices(**choice) |
|
elif isinstance(choice, BaseModel): |
|
_new_choice = StreamingChoices(**choice.model_dump()) |
|
new_choices.append(_new_choice) |
|
choices = new_choices |
|
else: |
|
choices = [StreamingChoices()] |
|
else: |
|
object = "chat.completion" |
|
if choices is not None and isinstance(choices, list): |
|
new_choices = [] |
|
for choice in choices: |
|
if isinstance(choice, Choices): |
|
_new_choice = choice |
|
elif isinstance(choice, dict): |
|
_new_choice = Choices(**choice) |
|
else: |
|
_new_choice = choice |
|
new_choices.append(_new_choice) |
|
choices = new_choices |
|
else: |
|
choices = [Choices()] |
|
if id is None: |
|
id = _generate_id() |
|
else: |
|
id = id |
|
if created is None: |
|
created = int(time.time()) |
|
else: |
|
created = created |
|
model = model |
|
if usage is not None: |
|
if isinstance(usage, dict): |
|
usage = Usage(**usage) |
|
else: |
|
usage = usage |
|
elif stream is None or stream is False: |
|
usage = Usage() |
|
if hidden_params: |
|
self._hidden_params = hidden_params |
|
|
|
if _response_headers: |
|
self._response_headers = _response_headers |
|
|
|
init_values = { |
|
"id": id, |
|
"choices": choices, |
|
"created": created, |
|
"model": model, |
|
"object": object, |
|
"system_fingerprint": system_fingerprint, |
|
} |
|
|
|
if usage is not None: |
|
init_values["usage"] = usage |
|
|
|
super().__init__( |
|
**init_values, |
|
**params, |
|
) |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
class Embedding(OpenAIObject): |
|
embedding: Union[list, str] = [] |
|
index: int |
|
object: Literal["embedding"] |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
class EmbeddingResponse(OpenAIObject): |
|
model: Optional[str] = None |
|
"""The model used for embedding.""" |
|
|
|
data: List |
|
"""The actual embedding value""" |
|
|
|
object: Literal["list"] |
|
"""The object type, which is always "list" """ |
|
|
|
usage: Optional[Usage] = None |
|
"""Usage statistics for the embedding request.""" |
|
|
|
_hidden_params: dict = {} |
|
_response_headers: Optional[Dict] = None |
|
_response_ms: Optional[float] = None |
|
|
|
def __init__( |
|
self, |
|
model: Optional[str] = None, |
|
usage: Optional[Usage] = None, |
|
response_ms=None, |
|
data: Optional[Union[List, List[Embedding]]] = None, |
|
hidden_params=None, |
|
_response_headers=None, |
|
**params, |
|
): |
|
object = "list" |
|
if response_ms: |
|
_response_ms = response_ms |
|
else: |
|
_response_ms = None |
|
if data: |
|
data = data |
|
else: |
|
data = [] |
|
|
|
if usage: |
|
usage = usage |
|
else: |
|
usage = Usage() |
|
|
|
if _response_headers: |
|
self._response_headers = _response_headers |
|
|
|
model = model |
|
super().__init__(model=model, object=object, data=data, usage=usage) |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
class Logprobs(OpenAIObject): |
|
text_offset: Optional[List[int]] |
|
token_logprobs: Optional[List[Union[float, None]]] |
|
tokens: Optional[List[str]] |
|
top_logprobs: Optional[List[Union[Dict[str, float], None]]] |
|
|
|
|
|
class TextChoices(OpenAIObject): |
|
def __init__(self, finish_reason=None, index=0, text=None, logprobs=None, **params): |
|
super(TextChoices, self).__init__(**params) |
|
if finish_reason: |
|
self.finish_reason = map_finish_reason(finish_reason) |
|
else: |
|
self.finish_reason = None |
|
self.index = index |
|
if text is not None: |
|
self.text = text |
|
else: |
|
self.text = None |
|
if logprobs is None: |
|
self.logprobs = None |
|
else: |
|
if isinstance(logprobs, dict): |
|
self.logprobs = Logprobs(**logprobs) |
|
else: |
|
self.logprobs = logprobs |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
class TextCompletionResponse(OpenAIObject): |
|
""" |
|
{ |
|
"id": response["id"], |
|
"object": "text_completion", |
|
"created": response["created"], |
|
"model": response["model"], |
|
"choices": [ |
|
{ |
|
"text": response["choices"][0]["message"]["content"], |
|
"index": response["choices"][0]["index"], |
|
"logprobs": transformed_logprobs, |
|
"finish_reason": response["choices"][0]["finish_reason"] |
|
} |
|
], |
|
"usage": response["usage"] |
|
} |
|
""" |
|
|
|
id: str |
|
object: str |
|
created: int |
|
model: Optional[str] |
|
choices: List[TextChoices] |
|
usage: Optional[Usage] |
|
_response_ms: Optional[int] = None |
|
_hidden_params: HiddenParams |
|
|
|
def __init__( |
|
self, |
|
id=None, |
|
choices=None, |
|
created=None, |
|
model=None, |
|
usage=None, |
|
stream=False, |
|
response_ms=None, |
|
object=None, |
|
**params, |
|
): |
|
if stream: |
|
object = "text_completion.chunk" |
|
choices = [TextChoices()] |
|
else: |
|
object = "text_completion" |
|
if choices is not None and isinstance(choices, list): |
|
new_choices = [] |
|
for choice in choices: |
|
_new_choice = None |
|
if isinstance(choice, TextChoices): |
|
_new_choice = choice |
|
elif isinstance(choice, dict): |
|
_new_choice = TextChoices(**choice) |
|
new_choices.append(_new_choice) |
|
choices = new_choices |
|
else: |
|
choices = [TextChoices()] |
|
if object is not None: |
|
object = object |
|
if id is None: |
|
id = _generate_id() |
|
else: |
|
id = id |
|
if created is None: |
|
created = int(time.time()) |
|
else: |
|
created = created |
|
|
|
model = model |
|
if usage: |
|
usage = usage |
|
else: |
|
usage = Usage() |
|
|
|
super(TextCompletionResponse, self).__init__( |
|
id=id, |
|
object=object, |
|
created=created, |
|
model=model, |
|
choices=choices, |
|
usage=usage, |
|
**params, |
|
) |
|
|
|
if response_ms: |
|
self._response_ms = response_ms |
|
else: |
|
self._response_ms = None |
|
self._hidden_params = HiddenParams() |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
|
|
from openai.types.images_response import Image as OpenAIImage |
|
|
|
|
|
class ImageObject(OpenAIImage): |
|
""" |
|
Represents the url or the content of an image generated by the OpenAI API. |
|
|
|
Attributes: |
|
b64_json: The base64-encoded JSON of the generated image, if response_format is b64_json. |
|
url: The URL of the generated image, if response_format is url (default). |
|
revised_prompt: The prompt that was used to generate the image, if there was any revision to the prompt. |
|
|
|
https://platform.openai.com/docs/api-reference/images/object |
|
""" |
|
|
|
b64_json: Optional[str] = None |
|
url: Optional[str] = None |
|
revised_prompt: Optional[str] = None |
|
|
|
def __init__(self, b64_json=None, url=None, revised_prompt=None, **kwargs): |
|
super().__init__(b64_json=b64_json, url=url, revised_prompt=revised_prompt) |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
from openai.types.images_response import ImagesResponse as OpenAIImageResponse |
|
|
|
|
|
class ImageResponse(OpenAIImageResponse): |
|
_hidden_params: dict = {} |
|
usage: Usage |
|
|
|
def __init__( |
|
self, |
|
created: Optional[int] = None, |
|
data: Optional[List[ImageObject]] = None, |
|
response_ms=None, |
|
usage: Optional[Usage] = None, |
|
hidden_params: Optional[dict] = None, |
|
): |
|
if response_ms: |
|
_response_ms = response_ms |
|
else: |
|
_response_ms = None |
|
if data: |
|
data = data |
|
else: |
|
data = [] |
|
|
|
if created: |
|
created = created |
|
else: |
|
created = int(time.time()) |
|
|
|
_data: List[OpenAIImage] = [] |
|
for d in data: |
|
if isinstance(d, dict): |
|
_data.append(ImageObject(**d)) |
|
elif isinstance(d, BaseModel): |
|
_data.append(ImageObject(**d.model_dump())) |
|
_usage = usage or Usage( |
|
prompt_tokens=0, |
|
completion_tokens=0, |
|
total_tokens=0, |
|
) |
|
super().__init__(created=created, data=_data, usage=_usage) |
|
self._hidden_params = hidden_params or {} |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
class TranscriptionResponse(OpenAIObject): |
|
text: Optional[str] = None |
|
|
|
_hidden_params: dict = {} |
|
_response_headers: Optional[dict] = None |
|
|
|
def __init__(self, text=None): |
|
super().__init__(text=text) |
|
|
|
def __contains__(self, key): |
|
|
|
return hasattr(self, key) |
|
|
|
def get(self, key, default=None): |
|
|
|
return getattr(self, key, default) |
|
|
|
def __getitem__(self, key): |
|
|
|
return getattr(self, key) |
|
|
|
def __setitem__(self, key, value): |
|
|
|
setattr(self, key, value) |
|
|
|
def json(self, **kwargs): |
|
try: |
|
return self.model_dump() |
|
except Exception: |
|
|
|
return self.dict() |
|
|
|
|
|
class GenericImageParsingChunk(TypedDict): |
|
type: str |
|
media_type: str |
|
data: str |
|
|
|
|
|
class ResponseFormatChunk(TypedDict, total=False): |
|
type: Required[Literal["json_object", "text"]] |
|
response_schema: dict |
|
|
|
|
|
class LoggedLiteLLMParams(TypedDict, total=False): |
|
force_timeout: Optional[float] |
|
custom_llm_provider: Optional[str] |
|
api_base: Optional[str] |
|
litellm_call_id: Optional[str] |
|
model_alias_map: Optional[dict] |
|
metadata: Optional[dict] |
|
model_info: Optional[dict] |
|
proxy_server_request: Optional[dict] |
|
acompletion: Optional[bool] |
|
preset_cache_key: Optional[str] |
|
no_log: Optional[bool] |
|
input_cost_per_second: Optional[float] |
|
input_cost_per_token: Optional[float] |
|
output_cost_per_token: Optional[float] |
|
output_cost_per_second: Optional[float] |
|
cooldown_time: Optional[float] |
|
|
|
|
|
class AdapterCompletionStreamWrapper: |
|
def __init__(self, completion_stream): |
|
self.completion_stream = completion_stream |
|
|
|
def __iter__(self): |
|
return self |
|
|
|
def __aiter__(self): |
|
return self |
|
|
|
def __next__(self): |
|
try: |
|
for chunk in self.completion_stream: |
|
if chunk == "None" or chunk is None: |
|
raise Exception |
|
return chunk |
|
raise StopIteration |
|
except StopIteration: |
|
raise StopIteration |
|
except Exception as e: |
|
print(f"AdapterCompletionStreamWrapper - {e}") |
|
|
|
async def __anext__(self): |
|
try: |
|
async for chunk in self.completion_stream: |
|
if chunk == "None" or chunk is None: |
|
raise Exception |
|
return chunk |
|
raise StopIteration |
|
except StopIteration: |
|
raise StopAsyncIteration |
|
|
|
|
|
class StandardLoggingUserAPIKeyMetadata(TypedDict): |
|
user_api_key_hash: Optional[str] |
|
user_api_key_alias: Optional[str] |
|
user_api_key_org_id: Optional[str] |
|
user_api_key_team_id: Optional[str] |
|
user_api_key_user_id: Optional[str] |
|
user_api_key_team_alias: Optional[str] |
|
user_api_key_end_user_id: Optional[str] |
|
|
|
|
|
class StandardLoggingPromptManagementMetadata(TypedDict): |
|
prompt_id: str |
|
prompt_variables: Optional[dict] |
|
prompt_integration: str |
|
|
|
|
|
class StandardLoggingMetadata(StandardLoggingUserAPIKeyMetadata): |
|
""" |
|
Specific metadata k,v pairs logged to integration for easier cost tracking and prompt management |
|
""" |
|
|
|
spend_logs_metadata: Optional[ |
|
dict |
|
] |
|
requester_ip_address: Optional[str] |
|
requester_metadata: Optional[dict] |
|
prompt_management_metadata: Optional[StandardLoggingPromptManagementMetadata] |
|
|
|
|
|
class StandardLoggingAdditionalHeaders(TypedDict, total=False): |
|
x_ratelimit_limit_requests: int |
|
x_ratelimit_limit_tokens: int |
|
x_ratelimit_remaining_requests: int |
|
x_ratelimit_remaining_tokens: int |
|
|
|
|
|
class StandardLoggingHiddenParams(TypedDict): |
|
model_id: Optional[str] |
|
cache_key: Optional[str] |
|
api_base: Optional[str] |
|
response_cost: Optional[str] |
|
litellm_overhead_time_ms: Optional[float] |
|
additional_headers: Optional[StandardLoggingAdditionalHeaders] |
|
|
|
|
|
class StandardLoggingModelInformation(TypedDict): |
|
model_map_key: str |
|
model_map_value: Optional[ModelInfo] |
|
|
|
|
|
class StandardLoggingModelCostFailureDebugInformation(TypedDict, total=False): |
|
""" |
|
Debug information, if cost tracking fails. |
|
|
|
Avoid logging sensitive information like response or optional params |
|
""" |
|
|
|
error_str: Required[str] |
|
traceback_str: Required[str] |
|
model: str |
|
cache_hit: Optional[bool] |
|
custom_llm_provider: Optional[str] |
|
base_model: Optional[str] |
|
call_type: str |
|
custom_pricing: Optional[bool] |
|
|
|
|
|
class StandardLoggingPayloadErrorInformation(TypedDict, total=False): |
|
error_code: Optional[str] |
|
error_class: Optional[str] |
|
llm_provider: Optional[str] |
|
|
|
|
|
class StandardLoggingGuardrailInformation(TypedDict, total=False): |
|
guardrail_name: Optional[str] |
|
guardrail_mode: Optional[Union[GuardrailEventHooks, List[GuardrailEventHooks]]] |
|
guardrail_response: Optional[Union[dict, str]] |
|
guardrail_status: Literal["success", "failure"] |
|
|
|
|
|
StandardLoggingPayloadStatus = Literal["success", "failure"] |
|
|
|
|
|
class StandardLoggingPayload(TypedDict): |
|
id: str |
|
trace_id: str |
|
call_type: str |
|
stream: Optional[bool] |
|
response_cost: float |
|
response_cost_failure_debug_info: Optional[ |
|
StandardLoggingModelCostFailureDebugInformation |
|
] |
|
status: StandardLoggingPayloadStatus |
|
custom_llm_provider: Optional[str] |
|
total_tokens: int |
|
prompt_tokens: int |
|
completion_tokens: int |
|
startTime: float |
|
endTime: float |
|
completionStartTime: float |
|
response_time: float |
|
model_map_information: StandardLoggingModelInformation |
|
model: str |
|
model_id: Optional[str] |
|
model_group: Optional[str] |
|
api_base: str |
|
metadata: StandardLoggingMetadata |
|
cache_hit: Optional[bool] |
|
cache_key: Optional[str] |
|
saved_cache_cost: float |
|
request_tags: list |
|
end_user: Optional[str] |
|
requester_ip_address: Optional[str] |
|
messages: Optional[Union[str, list, dict]] |
|
response: Optional[Union[str, list, dict]] |
|
error_str: Optional[str] |
|
error_information: Optional[StandardLoggingPayloadErrorInformation] |
|
model_parameters: dict |
|
hidden_params: StandardLoggingHiddenParams |
|
guardrail_information: Optional[StandardLoggingGuardrailInformation] |
|
|
|
|
|
from typing import AsyncIterator, Iterator |
|
|
|
|
|
class CustomStreamingDecoder: |
|
async def aiter_bytes( |
|
self, iterator: AsyncIterator[bytes] |
|
) -> AsyncIterator[ |
|
Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]] |
|
]: |
|
raise NotImplementedError |
|
|
|
def iter_bytes( |
|
self, iterator: Iterator[bytes] |
|
) -> Iterator[Optional[Union[GenericStreamingChunk, StreamingChatCompletionChunk]]]: |
|
raise NotImplementedError |
|
|
|
|
|
class StandardPassThroughResponseObject(TypedDict): |
|
response: str |
|
|
|
|
|
OPENAI_RESPONSE_HEADERS = [ |
|
"x-ratelimit-remaining-requests", |
|
"x-ratelimit-remaining-tokens", |
|
"x-ratelimit-limit-requests", |
|
"x-ratelimit-limit-tokens", |
|
"x-ratelimit-reset-requests", |
|
"x-ratelimit-reset-tokens", |
|
] |
|
|
|
|
|
class StandardCallbackDynamicParams(TypedDict, total=False): |
|
|
|
langfuse_public_key: Optional[str] |
|
langfuse_secret: Optional[str] |
|
langfuse_secret_key: Optional[str] |
|
langfuse_host: Optional[str] |
|
|
|
|
|
gcs_bucket_name: Optional[str] |
|
gcs_path_service_account: Optional[str] |
|
|
|
|
|
langsmith_api_key: Optional[str] |
|
langsmith_project: Optional[str] |
|
langsmith_base_url: Optional[str] |
|
|
|
|
|
humanloop_api_key: Optional[str] |
|
|
|
|
|
turn_off_message_logging: Optional[bool] |
|
|
|
|
|
all_litellm_params = [ |
|
"metadata", |
|
"litellm_metadata", |
|
"litellm_trace_id", |
|
"tags", |
|
"acompletion", |
|
"aimg_generation", |
|
"atext_completion", |
|
"text_completion", |
|
"caching", |
|
"mock_response", |
|
"mock_timeout", |
|
"disable_add_transform_inline_image_block", |
|
"api_key", |
|
"api_version", |
|
"prompt_id", |
|
"provider_specific_header", |
|
"prompt_variables", |
|
"api_base", |
|
"force_timeout", |
|
"logger_fn", |
|
"verbose", |
|
"custom_llm_provider", |
|
"litellm_logging_obj", |
|
"litellm_call_id", |
|
"use_client", |
|
"id", |
|
"fallbacks", |
|
"azure", |
|
"headers", |
|
"model_list", |
|
"num_retries", |
|
"context_window_fallback_dict", |
|
"retry_policy", |
|
"retry_strategy", |
|
"roles", |
|
"final_prompt_value", |
|
"bos_token", |
|
"eos_token", |
|
"request_timeout", |
|
"complete_response", |
|
"self", |
|
"client", |
|
"rpm", |
|
"tpm", |
|
"max_parallel_requests", |
|
"input_cost_per_token", |
|
"output_cost_per_token", |
|
"input_cost_per_second", |
|
"output_cost_per_second", |
|
"hf_model_name", |
|
"model_info", |
|
"proxy_server_request", |
|
"preset_cache_key", |
|
"caching_groups", |
|
"ttl", |
|
"cache", |
|
"no-log", |
|
"base_model", |
|
"stream_timeout", |
|
"supports_system_message", |
|
"region_name", |
|
"allowed_model_region", |
|
"model_config", |
|
"fastest_response", |
|
"cooldown_time", |
|
"cache_key", |
|
"max_retries", |
|
"azure_ad_token_provider", |
|
"tenant_id", |
|
"client_id", |
|
"azure_username", |
|
"azure_password", |
|
"client_secret", |
|
"user_continue_message", |
|
"configurable_clientside_auth_params", |
|
"weight", |
|
"ensure_alternating_roles", |
|
"assistant_continue_message", |
|
"user_continue_message", |
|
"fallback_depth", |
|
"max_fallbacks", |
|
"max_budget", |
|
"budget_duration", |
|
"use_in_pass_through", |
|
] + list(StandardCallbackDynamicParams.__annotations__.keys()) |
|
|
|
|
|
class KeyGenerationConfig(TypedDict, total=False): |
|
required_params: List[ |
|
str |
|
] |
|
|
|
|
|
class TeamUIKeyGenerationConfig(KeyGenerationConfig): |
|
allowed_team_member_roles: List[str] |
|
|
|
|
|
class PersonalUIKeyGenerationConfig(KeyGenerationConfig): |
|
allowed_user_roles: List[str] |
|
|
|
|
|
class StandardKeyGenerationConfig(TypedDict, total=False): |
|
team_key_generation: TeamUIKeyGenerationConfig |
|
personal_key_generation: PersonalUIKeyGenerationConfig |
|
|
|
|
|
class BudgetConfig(BaseModel): |
|
max_budget: Optional[float] = None |
|
budget_duration: Optional[str] = None |
|
tpm_limit: Optional[int] = None |
|
rpm_limit: Optional[int] = None |
|
|
|
def __init__(self, **data: Any) -> None: |
|
|
|
if "time_period" in data: |
|
data["budget_duration"] = data.pop("time_period") |
|
|
|
|
|
if "budget_limit" in data: |
|
data["max_budget"] = data.pop("budget_limit") |
|
|
|
super().__init__(**data) |
|
|
|
|
|
GenericBudgetConfigType = Dict[str, BudgetConfig] |
|
|
|
|
|
class LlmProviders(str, Enum): |
|
OPENAI = "openai" |
|
OPENAI_LIKE = "openai_like" |
|
JINA_AI = "jina_ai" |
|
XAI = "xai" |
|
CUSTOM_OPENAI = "custom_openai" |
|
TEXT_COMPLETION_OPENAI = "text-completion-openai" |
|
COHERE = "cohere" |
|
COHERE_CHAT = "cohere_chat" |
|
CLARIFAI = "clarifai" |
|
ANTHROPIC = "anthropic" |
|
ANTHROPIC_TEXT = "anthropic_text" |
|
REPLICATE = "replicate" |
|
HUGGINGFACE = "huggingface" |
|
TOGETHER_AI = "together_ai" |
|
OPENROUTER = "openrouter" |
|
VERTEX_AI = "vertex_ai" |
|
VERTEX_AI_BETA = "vertex_ai_beta" |
|
GEMINI = "gemini" |
|
AI21 = "ai21" |
|
BASETEN = "baseten" |
|
AZURE = "azure" |
|
AZURE_TEXT = "azure_text" |
|
AZURE_AI = "azure_ai" |
|
SAGEMAKER = "sagemaker" |
|
SAGEMAKER_CHAT = "sagemaker_chat" |
|
BEDROCK = "bedrock" |
|
VLLM = "vllm" |
|
NLP_CLOUD = "nlp_cloud" |
|
PETALS = "petals" |
|
OOBABOOGA = "oobabooga" |
|
OLLAMA = "ollama" |
|
OLLAMA_CHAT = "ollama_chat" |
|
DEEPINFRA = "deepinfra" |
|
PERPLEXITY = "perplexity" |
|
MISTRAL = "mistral" |
|
GROQ = "groq" |
|
NVIDIA_NIM = "nvidia_nim" |
|
CEREBRAS = "cerebras" |
|
AI21_CHAT = "ai21_chat" |
|
VOLCENGINE = "volcengine" |
|
CODESTRAL = "codestral" |
|
TEXT_COMPLETION_CODESTRAL = "text-completion-codestral" |
|
DEEPSEEK = "deepseek" |
|
SAMBANOVA = "sambanova" |
|
MARITALK = "maritalk" |
|
VOYAGE = "voyage" |
|
CLOUDFLARE = "cloudflare" |
|
XINFERENCE = "xinference" |
|
FIREWORKS_AI = "fireworks_ai" |
|
FRIENDLIAI = "friendliai" |
|
WATSONX = "watsonx" |
|
WATSONX_TEXT = "watsonx_text" |
|
TRITON = "triton" |
|
PREDIBASE = "predibase" |
|
DATABRICKS = "databricks" |
|
EMPOWER = "empower" |
|
GITHUB = "github" |
|
CUSTOM = "custom" |
|
LITELLM_PROXY = "litellm_proxy" |
|
HOSTED_VLLM = "hosted_vllm" |
|
LM_STUDIO = "lm_studio" |
|
GALADRIEL = "galadriel" |
|
INFINITY = "infinity" |
|
DEEPGRAM = "deepgram" |
|
AIOHTTP_OPENAI = "aiohttp_openai" |
|
LANGFUSE = "langfuse" |
|
HUMANLOOP = "humanloop" |
|
TOPAZ = "topaz" |
|
|
|
|
|
|
|
LlmProvidersSet = {provider.value for provider in LlmProviders} |
|
|
|
|
|
class LiteLLMLoggingBaseClass: |
|
""" |
|
Base class for logging pre and post call |
|
|
|
Meant to simplify type checking for logging obj. |
|
""" |
|
|
|
def pre_call(self, input, api_key, model=None, additional_args={}): |
|
pass |
|
|
|
def post_call( |
|
self, original_response, input=None, api_key=None, additional_args={} |
|
): |
|
pass |
|
|
|
|
|
class CustomHuggingfaceTokenizer(TypedDict): |
|
identifier: str |
|
revision: str |
|
auth_token: Optional[str] |
|
|
|
|
|
class LITELLM_IMAGE_VARIATION_PROVIDERS(Enum): |
|
""" |
|
Try using an enum for endpoints. This should make it easier to track what provider is supported for what endpoint. |
|
""" |
|
|
|
OPENAI = LlmProviders.OPENAI.value |
|
TOPAZ = LlmProviders.TOPAZ.value |
|
|
|
|
|
class HttpHandlerRequestFields(TypedDict, total=False): |
|
data: dict |
|
params: dict |
|
files: dict |
|
content: Any |
|
|
|
|
|
class ProviderSpecificHeader(TypedDict): |
|
custom_llm_provider: str |
|
extra_headers: dict |
|
|
|
|
|
class SelectTokenizerResponse(TypedDict): |
|
type: Literal["openai_tokenizer", "huggingface_tokenizer"] |
|
tokenizer: Any |
|
|