|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import asyncio |
|
import contextvars |
|
import datetime |
|
import inspect |
|
import json |
|
import os |
|
import random |
|
import sys |
|
import time |
|
import traceback |
|
import uuid |
|
from concurrent import futures |
|
from concurrent.futures import FIRST_COMPLETED, ThreadPoolExecutor, wait |
|
from copy import deepcopy |
|
from functools import partial |
|
from typing import ( |
|
Any, |
|
Callable, |
|
Dict, |
|
List, |
|
Literal, |
|
Mapping, |
|
Optional, |
|
Type, |
|
Union, |
|
cast, |
|
) |
|
|
|
import dotenv |
|
import httpx |
|
import openai |
|
import tiktoken |
|
from pydantic import BaseModel |
|
from typing_extensions import overload |
|
|
|
import litellm |
|
from litellm import ( |
|
Logging, |
|
client, |
|
exception_type, |
|
get_litellm_params, |
|
get_optional_params, |
|
) |
|
from litellm.integrations.custom_logger import CustomLogger |
|
from litellm.litellm_core_utils.audio_utils.utils import get_audio_file_for_health_check |
|
from litellm.litellm_core_utils.health_check_utils import ( |
|
_create_health_check_response, |
|
_filter_model_params, |
|
) |
|
from litellm.litellm_core_utils.litellm_logging import Logging as LiteLLMLoggingObj |
|
from litellm.litellm_core_utils.llm_request_utils import ( |
|
pick_cheapest_chat_models_from_llm_provider, |
|
) |
|
from litellm.litellm_core_utils.mock_functions import ( |
|
mock_embedding, |
|
mock_image_generation, |
|
) |
|
from litellm.litellm_core_utils.prompt_templates.common_utils import ( |
|
get_content_from_model_response, |
|
) |
|
from litellm.llms.base_llm.chat.transformation import BaseConfig |
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler |
|
from litellm.realtime_api.main import _realtime_health_check |
|
from litellm.secret_managers.main import get_secret_str |
|
from litellm.types.router import GenericLiteLLMParams |
|
from litellm.utils import ( |
|
CustomStreamWrapper, |
|
ProviderConfigManager, |
|
Usage, |
|
add_openai_metadata, |
|
async_mock_completion_streaming_obj, |
|
convert_to_model_response_object, |
|
create_pretrained_tokenizer, |
|
create_tokenizer, |
|
get_api_key, |
|
get_llm_provider, |
|
get_non_default_completion_params, |
|
get_optional_params_embeddings, |
|
get_optional_params_image_gen, |
|
get_optional_params_transcription, |
|
get_secret, |
|
mock_completion_streaming_obj, |
|
read_config_args, |
|
supports_httpx_timeout, |
|
token_counter, |
|
validate_chat_completion_messages, |
|
validate_chat_completion_tool_choice, |
|
) |
|
|
|
from ._logging import verbose_logger |
|
from .caching.caching import disable_cache, enable_cache, update_cache |
|
from .litellm_core_utils.fallback_utils import ( |
|
async_completion_with_fallbacks, |
|
completion_with_fallbacks, |
|
) |
|
from .litellm_core_utils.prompt_templates.common_utils import get_completion_messages |
|
from .litellm_core_utils.prompt_templates.factory import ( |
|
custom_prompt, |
|
function_call_prompt, |
|
map_system_message_pt, |
|
ollama_pt, |
|
prompt_factory, |
|
stringify_json_tool_call_content, |
|
) |
|
from .litellm_core_utils.streaming_chunk_builder_utils import ChunkProcessor |
|
from .llms import baseten, maritalk, ollama_chat |
|
from .llms.anthropic.chat import AnthropicChatCompletion |
|
from .llms.azure.audio_transcriptions import AzureAudioTranscription |
|
from .llms.azure.azure import AzureChatCompletion, _check_dynamic_azure_params |
|
from .llms.azure.chat.o_series_handler import AzureOpenAIO1ChatCompletion |
|
from .llms.azure.completion.handler import AzureTextCompletion |
|
from .llms.azure_ai.embed import AzureAIEmbedding |
|
from .llms.bedrock.chat import BedrockConverseLLM, BedrockLLM |
|
from .llms.bedrock.embed.embedding import BedrockEmbedding |
|
from .llms.bedrock.image.image_handler import BedrockImageGeneration |
|
from .llms.codestral.completion.handler import CodestralTextCompletion |
|
from .llms.cohere.embed import handler as cohere_embed |
|
from .llms.custom_httpx.aiohttp_handler import BaseLLMAIOHTTPHandler |
|
from .llms.custom_httpx.llm_http_handler import BaseLLMHTTPHandler |
|
from .llms.custom_llm import CustomLLM, custom_chat_llm_router |
|
from .llms.databricks.chat.handler import DatabricksChatCompletion |
|
from .llms.databricks.embed.handler import DatabricksEmbeddingHandler |
|
from .llms.deprecated_providers import aleph_alpha, palm |
|
from .llms.groq.chat.handler import GroqChatCompletion |
|
from .llms.huggingface.chat.handler import Huggingface |
|
from .llms.nlp_cloud.chat.handler import completion as nlp_cloud_chat_completion |
|
from .llms.ollama.completion import handler as ollama |
|
from .llms.oobabooga.chat import oobabooga |
|
from .llms.openai.completion.handler import OpenAITextCompletion |
|
from .llms.openai.image_variations.handler import OpenAIImageVariationsHandler |
|
from .llms.openai.openai import OpenAIChatCompletion |
|
from .llms.openai.transcriptions.handler import OpenAIAudioTranscription |
|
from .llms.openai_like.chat.handler import OpenAILikeChatHandler |
|
from .llms.openai_like.embedding.handler import OpenAILikeEmbeddingHandler |
|
from .llms.petals.completion import handler as petals_handler |
|
from .llms.predibase.chat.handler import PredibaseChatCompletion |
|
from .llms.replicate.chat.handler import completion as replicate_chat_completion |
|
from .llms.sagemaker.chat.handler import SagemakerChatHandler |
|
from .llms.sagemaker.completion.handler import SagemakerLLM |
|
from .llms.vertex_ai import vertex_ai_non_gemini |
|
from .llms.vertex_ai.gemini.vertex_and_google_ai_studio_gemini import VertexLLM |
|
from .llms.vertex_ai.gemini_embeddings.batch_embed_content_handler import ( |
|
GoogleBatchEmbeddings, |
|
) |
|
from .llms.vertex_ai.image_generation.image_generation_handler import ( |
|
VertexImageGeneration, |
|
) |
|
from .llms.vertex_ai.multimodal_embeddings.embedding_handler import ( |
|
VertexMultimodalEmbedding, |
|
) |
|
from .llms.vertex_ai.text_to_speech.text_to_speech_handler import VertexTextToSpeechAPI |
|
from .llms.vertex_ai.vertex_ai_partner_models.main import VertexAIPartnerModels |
|
from .llms.vertex_ai.vertex_embeddings.embedding_handler import VertexEmbedding |
|
from .llms.vertex_ai.vertex_model_garden.main import VertexAIModelGardenModels |
|
from .llms.vllm.completion import handler as vllm_handler |
|
from .llms.watsonx.chat.handler import WatsonXChatHandler |
|
from .llms.watsonx.common_utils import IBMWatsonXMixin |
|
from .types.llms.openai import ( |
|
ChatCompletionAssistantMessage, |
|
ChatCompletionAudioParam, |
|
ChatCompletionModality, |
|
ChatCompletionPredictionContentParam, |
|
ChatCompletionUserMessage, |
|
HttpxBinaryResponseContent, |
|
) |
|
from .types.utils import ( |
|
LITELLM_IMAGE_VARIATION_PROVIDERS, |
|
AdapterCompletionStreamWrapper, |
|
ChatCompletionMessageToolCall, |
|
CompletionTokensDetails, |
|
FileTypes, |
|
HiddenParams, |
|
LlmProviders, |
|
PromptTokensDetails, |
|
ProviderSpecificHeader, |
|
all_litellm_params, |
|
) |
|
|
|
encoding = tiktoken.get_encoding("cl100k_base") |
|
from litellm.utils import ( |
|
Choices, |
|
EmbeddingResponse, |
|
ImageResponse, |
|
Message, |
|
ModelResponse, |
|
TextChoices, |
|
TextCompletionResponse, |
|
TextCompletionStreamWrapper, |
|
TranscriptionResponse, |
|
) |
|
|
|
|
|
openai_chat_completions = OpenAIChatCompletion() |
|
openai_text_completions = OpenAITextCompletion() |
|
openai_audio_transcriptions = OpenAIAudioTranscription() |
|
openai_image_variations = OpenAIImageVariationsHandler() |
|
databricks_chat_completions = DatabricksChatCompletion() |
|
groq_chat_completions = GroqChatCompletion() |
|
azure_ai_embedding = AzureAIEmbedding() |
|
anthropic_chat_completions = AnthropicChatCompletion() |
|
azure_chat_completions = AzureChatCompletion() |
|
azure_o1_chat_completions = AzureOpenAIO1ChatCompletion() |
|
azure_text_completions = AzureTextCompletion() |
|
azure_audio_transcriptions = AzureAudioTranscription() |
|
huggingface = Huggingface() |
|
predibase_chat_completions = PredibaseChatCompletion() |
|
codestral_text_completions = CodestralTextCompletion() |
|
bedrock_chat_completion = BedrockLLM() |
|
bedrock_converse_chat_completion = BedrockConverseLLM() |
|
bedrock_embedding = BedrockEmbedding() |
|
bedrock_image_generation = BedrockImageGeneration() |
|
vertex_chat_completion = VertexLLM() |
|
vertex_embedding = VertexEmbedding() |
|
vertex_multimodal_embedding = VertexMultimodalEmbedding() |
|
vertex_image_generation = VertexImageGeneration() |
|
google_batch_embeddings = GoogleBatchEmbeddings() |
|
vertex_partner_models_chat_completion = VertexAIPartnerModels() |
|
vertex_model_garden_chat_completion = VertexAIModelGardenModels() |
|
vertex_text_to_speech = VertexTextToSpeechAPI() |
|
sagemaker_llm = SagemakerLLM() |
|
watsonx_chat_completion = WatsonXChatHandler() |
|
openai_like_embedding = OpenAILikeEmbeddingHandler() |
|
openai_like_chat_completion = OpenAILikeChatHandler() |
|
databricks_embedding = DatabricksEmbeddingHandler() |
|
base_llm_http_handler = BaseLLMHTTPHandler() |
|
base_llm_aiohttp_handler = BaseLLMAIOHTTPHandler() |
|
sagemaker_chat_completion = SagemakerChatHandler() |
|
|
|
|
|
|
|
class LiteLLM: |
|
def __init__( |
|
self, |
|
*, |
|
api_key=None, |
|
organization: Optional[str] = None, |
|
base_url: Optional[str] = None, |
|
timeout: Optional[float] = 600, |
|
max_retries: Optional[int] = litellm.num_retries, |
|
default_headers: Optional[Mapping[str, str]] = None, |
|
): |
|
self.params = locals() |
|
self.chat = Chat(self.params, router_obj=None) |
|
|
|
|
|
class Chat: |
|
def __init__(self, params, router_obj: Optional[Any]): |
|
self.params = params |
|
if self.params.get("acompletion", False) is True: |
|
self.params.pop("acompletion") |
|
self.completions: Union[AsyncCompletions, Completions] = AsyncCompletions( |
|
self.params, router_obj=router_obj |
|
) |
|
else: |
|
self.completions = Completions(self.params, router_obj=router_obj) |
|
|
|
|
|
class Completions: |
|
def __init__(self, params, router_obj: Optional[Any]): |
|
self.params = params |
|
self.router_obj = router_obj |
|
|
|
def create(self, messages, model=None, **kwargs): |
|
for k, v in kwargs.items(): |
|
self.params[k] = v |
|
model = model or self.params.get("model") |
|
if self.router_obj is not None: |
|
response = self.router_obj.completion( |
|
model=model, messages=messages, **self.params |
|
) |
|
else: |
|
response = completion(model=model, messages=messages, **self.params) |
|
return response |
|
|
|
|
|
class AsyncCompletions: |
|
def __init__(self, params, router_obj: Optional[Any]): |
|
self.params = params |
|
self.router_obj = router_obj |
|
|
|
async def create(self, messages, model=None, **kwargs): |
|
for k, v in kwargs.items(): |
|
self.params[k] = v |
|
model = model or self.params.get("model") |
|
if self.router_obj is not None: |
|
response = await self.router_obj.acompletion( |
|
model=model, messages=messages, **self.params |
|
) |
|
else: |
|
response = await acompletion(model=model, messages=messages, **self.params) |
|
return response |
|
|
|
|
|
@client |
|
async def acompletion( |
|
model: str, |
|
|
|
messages: List = [], |
|
functions: Optional[List] = None, |
|
function_call: Optional[str] = None, |
|
timeout: Optional[Union[float, int]] = None, |
|
temperature: Optional[float] = None, |
|
top_p: Optional[float] = None, |
|
n: Optional[int] = None, |
|
stream: Optional[bool] = None, |
|
stream_options: Optional[dict] = None, |
|
stop=None, |
|
max_tokens: Optional[int] = None, |
|
max_completion_tokens: Optional[int] = None, |
|
modalities: Optional[List[ChatCompletionModality]] = None, |
|
prediction: Optional[ChatCompletionPredictionContentParam] = None, |
|
audio: Optional[ChatCompletionAudioParam] = None, |
|
presence_penalty: Optional[float] = None, |
|
frequency_penalty: Optional[float] = None, |
|
logit_bias: Optional[dict] = None, |
|
user: Optional[str] = None, |
|
|
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None, |
|
seed: Optional[int] = None, |
|
tools: Optional[List] = None, |
|
tool_choice: Optional[str] = None, |
|
parallel_tool_calls: Optional[bool] = None, |
|
logprobs: Optional[bool] = None, |
|
top_logprobs: Optional[int] = None, |
|
deployment_id=None, |
|
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None, |
|
|
|
base_url: Optional[str] = None, |
|
api_version: Optional[str] = None, |
|
api_key: Optional[str] = None, |
|
model_list: Optional[list] = None, |
|
extra_headers: Optional[dict] = None, |
|
|
|
**kwargs, |
|
) -> Union[ModelResponse, CustomStreamWrapper]: |
|
""" |
|
Asynchronously executes a litellm.completion() call for any of litellm supported llms (example gpt-4, gpt-3.5-turbo, claude-2, command-nightly) |
|
|
|
Parameters: |
|
model (str): The name of the language model to use for text completion. see all supported LLMs: https://docs.litellm.ai/docs/providers/ |
|
messages (List): A list of message objects representing the conversation context (default is an empty list). |
|
|
|
OPTIONAL PARAMS |
|
functions (List, optional): A list of functions to apply to the conversation messages (default is an empty list). |
|
function_call (str, optional): The name of the function to call within the conversation (default is an empty string). |
|
temperature (float, optional): The temperature parameter for controlling the randomness of the output (default is 1.0). |
|
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0). |
|
n (int, optional): The number of completions to generate (default is 1). |
|
stream (bool, optional): If True, return a streaming response (default is False). |
|
stream_options (dict, optional): A dictionary containing options for the streaming response. Only use this if stream is True. |
|
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens. |
|
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity). |
|
max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. |
|
modalities (List[ChatCompletionModality], optional): Output types that you would like the model to generate for this request. You can use `["text", "audio"]` |
|
prediction (ChatCompletionPredictionContentParam, optional): Configuration for a Predicted Output, which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content. |
|
audio (ChatCompletionAudioParam, optional): Parameters for audio output. Required when audio output is requested with modalities: ["audio"] |
|
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far. |
|
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far. |
|
logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion. |
|
user (str, optional): A unique identifier representing your end-user. This can help the LLM provider to monitor and detect abuse. |
|
metadata (dict, optional): Pass in additional metadata to tag your completion calls - eg. prompt version, details, etc. |
|
api_base (str, optional): Base URL for the API (default is None). |
|
api_version (str, optional): API version (default is None). |
|
api_key (str, optional): API key (default is None). |
|
model_list (list, optional): List of api base, version, keys |
|
timeout (float, optional): The maximum execution time in seconds for the completion request. |
|
|
|
LITELLM Specific Params |
|
mock_response (str, optional): If provided, return a mock completion response for testing or debugging purposes (default is None). |
|
custom_llm_provider (str, optional): Used for Non-OpenAI LLMs, Example usage for bedrock, set model="amazon.titan-tg1-large" and custom_llm_provider="bedrock" |
|
Returns: |
|
ModelResponse: A response object containing the generated completion and associated metadata. |
|
|
|
Notes: |
|
- This function is an asynchronous version of the `completion` function. |
|
- The `completion` function is called using `run_in_executor` to execute synchronously in the event loop. |
|
- If `stream` is True, the function returns an async generator that yields completion lines. |
|
""" |
|
fallbacks = kwargs.get("fallbacks", None) |
|
mock_timeout = kwargs.get("mock_timeout", None) |
|
|
|
if mock_timeout is True: |
|
await _handle_mock_timeout_async(mock_timeout, timeout, model) |
|
|
|
loop = asyncio.get_event_loop() |
|
custom_llm_provider = kwargs.get("custom_llm_provider", None) |
|
|
|
completion_kwargs = { |
|
"model": model, |
|
"messages": messages, |
|
"functions": functions, |
|
"function_call": function_call, |
|
"timeout": timeout, |
|
"temperature": temperature, |
|
"top_p": top_p, |
|
"n": n, |
|
"stream": stream, |
|
"stream_options": stream_options, |
|
"stop": stop, |
|
"max_tokens": max_tokens, |
|
"max_completion_tokens": max_completion_tokens, |
|
"modalities": modalities, |
|
"prediction": prediction, |
|
"audio": audio, |
|
"presence_penalty": presence_penalty, |
|
"frequency_penalty": frequency_penalty, |
|
"logit_bias": logit_bias, |
|
"user": user, |
|
"response_format": response_format, |
|
"seed": seed, |
|
"tools": tools, |
|
"tool_choice": tool_choice, |
|
"parallel_tool_calls": parallel_tool_calls, |
|
"logprobs": logprobs, |
|
"top_logprobs": top_logprobs, |
|
"deployment_id": deployment_id, |
|
"base_url": base_url, |
|
"api_version": api_version, |
|
"api_key": api_key, |
|
"model_list": model_list, |
|
"reasoning_effort": reasoning_effort, |
|
"extra_headers": extra_headers, |
|
"acompletion": True, |
|
} |
|
if custom_llm_provider is None: |
|
_, custom_llm_provider, _, _ = get_llm_provider( |
|
model=model, api_base=completion_kwargs.get("base_url", None) |
|
) |
|
|
|
fallbacks = fallbacks or litellm.model_fallbacks |
|
if fallbacks is not None: |
|
response = await async_completion_with_fallbacks( |
|
**completion_kwargs, kwargs={"fallbacks": fallbacks} |
|
) |
|
if response is None: |
|
raise Exception( |
|
"No response from fallbacks. Got none. Turn on `litellm.set_verbose=True` to see more details." |
|
) |
|
return response |
|
|
|
try: |
|
|
|
func = partial(completion, **completion_kwargs, **kwargs) |
|
|
|
|
|
ctx = contextvars.copy_context() |
|
func_with_context = partial(ctx.run, func) |
|
|
|
init_response = await loop.run_in_executor(None, func_with_context) |
|
if isinstance(init_response, dict) or isinstance( |
|
init_response, ModelResponse |
|
): |
|
if isinstance(init_response, dict): |
|
response = ModelResponse(**init_response) |
|
response = init_response |
|
elif asyncio.iscoroutine(init_response): |
|
response = await init_response |
|
else: |
|
response = init_response |
|
|
|
if ( |
|
custom_llm_provider == "text-completion-openai" |
|
or custom_llm_provider == "text-completion-codestral" |
|
) and isinstance(response, TextCompletionResponse): |
|
response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object( |
|
response_object=response, |
|
model_response_object=litellm.ModelResponse(), |
|
) |
|
if isinstance(response, CustomStreamWrapper): |
|
response.set_logging_event_loop( |
|
loop=loop |
|
) |
|
return response |
|
except Exception as e: |
|
custom_llm_provider = custom_llm_provider or "openai" |
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=completion_kwargs, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
async def _async_streaming(response, model, custom_llm_provider, args): |
|
try: |
|
print_verbose(f"received response in _async_streaming: {response}") |
|
if asyncio.iscoroutine(response): |
|
response = await response |
|
async for line in response: |
|
print_verbose(f"line in async streaming: {line}") |
|
yield line |
|
except Exception as e: |
|
custom_llm_provider = custom_llm_provider or "openai" |
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
) |
|
|
|
|
|
def _handle_mock_potential_exceptions( |
|
mock_response: Union[str, Exception, dict], |
|
model: str, |
|
custom_llm_provider: Optional[str] = None, |
|
): |
|
if isinstance(mock_response, Exception): |
|
if isinstance(mock_response, openai.APIError): |
|
raise mock_response |
|
raise litellm.MockException( |
|
status_code=getattr(mock_response, "status_code", 500), |
|
message=getattr(mock_response, "text", str(mock_response)), |
|
llm_provider=getattr( |
|
mock_response, "llm_provider", custom_llm_provider or "openai" |
|
), |
|
model=model, |
|
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"), |
|
) |
|
elif isinstance(mock_response, str) and mock_response == "litellm.RateLimitError": |
|
raise litellm.RateLimitError( |
|
message="this is a mock rate limit error", |
|
llm_provider=getattr( |
|
mock_response, "llm_provider", custom_llm_provider or "openai" |
|
), |
|
model=model, |
|
) |
|
elif ( |
|
isinstance(mock_response, str) |
|
and mock_response == "litellm.ContextWindowExceededError" |
|
): |
|
raise litellm.ContextWindowExceededError( |
|
message="this is a mock context window exceeded error", |
|
llm_provider=getattr( |
|
mock_response, "llm_provider", custom_llm_provider or "openai" |
|
), |
|
model=model, |
|
) |
|
elif ( |
|
isinstance(mock_response, str) |
|
and mock_response == "litellm.InternalServerError" |
|
): |
|
raise litellm.InternalServerError( |
|
message="this is a mock internal server error", |
|
llm_provider=getattr( |
|
mock_response, "llm_provider", custom_llm_provider or "openai" |
|
), |
|
model=model, |
|
) |
|
elif isinstance(mock_response, str) and mock_response.startswith( |
|
"Exception: content_filter_policy" |
|
): |
|
raise litellm.MockException( |
|
status_code=400, |
|
message=mock_response, |
|
llm_provider="azure", |
|
model=model, |
|
request=httpx.Request(method="POST", url="https://api.openai.com/v1/"), |
|
) |
|
|
|
|
|
def _handle_mock_timeout( |
|
mock_timeout: Optional[bool], |
|
timeout: Optional[Union[float, str, httpx.Timeout]], |
|
model: str, |
|
): |
|
if mock_timeout is True and timeout is not None: |
|
_sleep_for_timeout(timeout) |
|
raise litellm.Timeout( |
|
message="This is a mock timeout error", |
|
llm_provider="openai", |
|
model=model, |
|
) |
|
|
|
|
|
async def _handle_mock_timeout_async( |
|
mock_timeout: Optional[bool], |
|
timeout: Optional[Union[float, str, httpx.Timeout]], |
|
model: str, |
|
): |
|
if mock_timeout is True and timeout is not None: |
|
await _sleep_for_timeout_async(timeout) |
|
raise litellm.Timeout( |
|
message="This is a mock timeout error", |
|
llm_provider="openai", |
|
model=model, |
|
) |
|
|
|
|
|
def _sleep_for_timeout(timeout: Union[float, str, httpx.Timeout]): |
|
if isinstance(timeout, float): |
|
time.sleep(timeout) |
|
elif isinstance(timeout, str): |
|
time.sleep(float(timeout)) |
|
elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None: |
|
time.sleep(timeout.connect) |
|
|
|
|
|
async def _sleep_for_timeout_async(timeout: Union[float, str, httpx.Timeout]): |
|
if isinstance(timeout, float): |
|
await asyncio.sleep(timeout) |
|
elif isinstance(timeout, str): |
|
await asyncio.sleep(float(timeout)) |
|
elif isinstance(timeout, httpx.Timeout) and timeout.connect is not None: |
|
await asyncio.sleep(timeout.connect) |
|
|
|
|
|
def mock_completion( |
|
model: str, |
|
messages: List, |
|
stream: Optional[bool] = False, |
|
n: Optional[int] = None, |
|
mock_response: Union[str, Exception, dict] = "This is a mock request", |
|
mock_tool_calls: Optional[List] = None, |
|
mock_timeout: Optional[bool] = False, |
|
logging=None, |
|
custom_llm_provider=None, |
|
timeout: Optional[Union[float, str, httpx.Timeout]] = None, |
|
**kwargs, |
|
): |
|
""" |
|
Generate a mock completion response for testing or debugging purposes. |
|
|
|
This is a helper function that simulates the response structure of the OpenAI completion API. |
|
|
|
Parameters: |
|
model (str): The name of the language model for which the mock response is generated. |
|
messages (List): A list of message objects representing the conversation context. |
|
stream (bool, optional): If True, returns a mock streaming response (default is False). |
|
mock_response (str, optional): The content of the mock response (default is "This is a mock request"). |
|
mock_timeout (bool, optional): If True, the mock response will be a timeout error (default is False). |
|
timeout (float, optional): The timeout value to use for the mock response (default is None). |
|
**kwargs: Additional keyword arguments that can be used but are not required. |
|
|
|
Returns: |
|
litellm.ModelResponse: A ModelResponse simulating a completion response with the specified model, messages, and mock response. |
|
|
|
Raises: |
|
Exception: If an error occurs during the generation of the mock completion response. |
|
Note: |
|
- This function is intended for testing or debugging purposes to generate mock completion responses. |
|
- If 'stream' is True, it returns a response that mimics the behavior of a streaming completion. |
|
""" |
|
try: |
|
if mock_response is None: |
|
mock_response = "This is a mock request" |
|
|
|
_handle_mock_timeout(mock_timeout=mock_timeout, timeout=timeout, model=model) |
|
|
|
|
|
if logging is not None: |
|
logging.pre_call( |
|
input=messages, |
|
api_key="mock-key", |
|
) |
|
|
|
_handle_mock_potential_exceptions( |
|
mock_response=mock_response, |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
|
|
mock_response = cast( |
|
Union[str, dict], mock_response |
|
) |
|
if isinstance(mock_response, str) and mock_response.startswith( |
|
"Exception: mock_streaming_error" |
|
): |
|
mock_response = litellm.MockException( |
|
message="This is a mock error raised mid-stream", |
|
llm_provider="anthropic", |
|
model=model, |
|
status_code=529, |
|
) |
|
time_delay = kwargs.get("mock_delay", None) |
|
if time_delay is not None: |
|
time.sleep(time_delay) |
|
|
|
if isinstance(mock_response, dict): |
|
return ModelResponse(**mock_response) |
|
|
|
model_response = ModelResponse(stream=stream) |
|
if stream is True: |
|
|
|
if kwargs.get("acompletion", False) is True: |
|
return CustomStreamWrapper( |
|
completion_stream=async_mock_completion_streaming_obj( |
|
model_response, mock_response=mock_response, model=model, n=n |
|
), |
|
model=model, |
|
custom_llm_provider="openai", |
|
logging_obj=logging, |
|
) |
|
return CustomStreamWrapper( |
|
completion_stream=mock_completion_streaming_obj( |
|
model_response, mock_response=mock_response, model=model, n=n |
|
), |
|
model=model, |
|
custom_llm_provider="openai", |
|
logging_obj=logging, |
|
) |
|
if isinstance(mock_response, litellm.MockException): |
|
raise mock_response |
|
if n is None: |
|
model_response.choices[0].message.content = mock_response |
|
else: |
|
_all_choices = [] |
|
for i in range(n): |
|
_choice = litellm.utils.Choices( |
|
index=i, |
|
message=litellm.utils.Message( |
|
content=mock_response, role="assistant" |
|
), |
|
) |
|
_all_choices.append(_choice) |
|
model_response.choices = _all_choices |
|
model_response.created = int(time.time()) |
|
model_response.model = model |
|
|
|
if mock_tool_calls: |
|
model_response.choices[0].message.tool_calls = [ |
|
ChatCompletionMessageToolCall(**tool_call) |
|
for tool_call in mock_tool_calls |
|
] |
|
|
|
setattr( |
|
model_response, |
|
"usage", |
|
Usage(prompt_tokens=10, completion_tokens=20, total_tokens=30), |
|
) |
|
|
|
try: |
|
_, custom_llm_provider, _, _ = litellm.utils.get_llm_provider(model=model) |
|
model_response._hidden_params["custom_llm_provider"] = custom_llm_provider |
|
except Exception: |
|
|
|
pass |
|
|
|
if logging is not None: |
|
logging.post_call( |
|
input=messages, |
|
api_key="my-secret-key", |
|
original_response="my-original-response", |
|
) |
|
return model_response |
|
|
|
except Exception as e: |
|
if isinstance(e, openai.APIError): |
|
raise e |
|
raise Exception("Mock completion response failed - {}".format(e)) |
|
|
|
|
|
@client |
|
def completion( |
|
model: str, |
|
|
|
messages: List = [], |
|
timeout: Optional[Union[float, str, httpx.Timeout]] = None, |
|
temperature: Optional[float] = None, |
|
top_p: Optional[float] = None, |
|
n: Optional[int] = None, |
|
stream: Optional[bool] = None, |
|
stream_options: Optional[dict] = None, |
|
stop=None, |
|
max_completion_tokens: Optional[int] = None, |
|
max_tokens: Optional[int] = None, |
|
modalities: Optional[List[ChatCompletionModality]] = None, |
|
prediction: Optional[ChatCompletionPredictionContentParam] = None, |
|
audio: Optional[ChatCompletionAudioParam] = None, |
|
presence_penalty: Optional[float] = None, |
|
frequency_penalty: Optional[float] = None, |
|
logit_bias: Optional[dict] = None, |
|
user: Optional[str] = None, |
|
|
|
reasoning_effort: Optional[Literal["low", "medium", "high"]] = None, |
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None, |
|
seed: Optional[int] = None, |
|
tools: Optional[List] = None, |
|
tool_choice: Optional[Union[str, dict]] = None, |
|
logprobs: Optional[bool] = None, |
|
top_logprobs: Optional[int] = None, |
|
parallel_tool_calls: Optional[bool] = None, |
|
deployment_id=None, |
|
extra_headers: Optional[dict] = None, |
|
|
|
functions: Optional[List] = None, |
|
function_call: Optional[str] = None, |
|
|
|
base_url: Optional[str] = None, |
|
api_version: Optional[str] = None, |
|
api_key: Optional[str] = None, |
|
model_list: Optional[list] = None, |
|
|
|
**kwargs, |
|
) -> Union[ModelResponse, CustomStreamWrapper]: |
|
""" |
|
Perform a completion() using any of litellm supported llms (example gpt-4, gpt-3.5-turbo, claude-2, command-nightly) |
|
Parameters: |
|
model (str): The name of the language model to use for text completion. see all supported LLMs: https://docs.litellm.ai/docs/providers/ |
|
messages (List): A list of message objects representing the conversation context (default is an empty list). |
|
|
|
OPTIONAL PARAMS |
|
functions (List, optional): A list of functions to apply to the conversation messages (default is an empty list). |
|
function_call (str, optional): The name of the function to call within the conversation (default is an empty string). |
|
temperature (float, optional): The temperature parameter for controlling the randomness of the output (default is 1.0). |
|
top_p (float, optional): The top-p parameter for nucleus sampling (default is 1.0). |
|
n (int, optional): The number of completions to generate (default is 1). |
|
stream (bool, optional): If True, return a streaming response (default is False). |
|
stream_options (dict, optional): A dictionary containing options for the streaming response. Only set this when you set stream: true. |
|
stop(string/list, optional): - Up to 4 sequences where the LLM API will stop generating further tokens. |
|
max_tokens (integer, optional): The maximum number of tokens in the generated completion (default is infinity). |
|
max_completion_tokens (integer, optional): An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. |
|
modalities (List[ChatCompletionModality], optional): Output types that you would like the model to generate for this request.. You can use `["text", "audio"]` |
|
prediction (ChatCompletionPredictionContentParam, optional): Configuration for a Predicted Output, which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content. |
|
audio (ChatCompletionAudioParam, optional): Parameters for audio output. Required when audio output is requested with modalities: ["audio"] |
|
presence_penalty (float, optional): It is used to penalize new tokens based on their existence in the text so far. |
|
frequency_penalty: It is used to penalize new tokens based on their frequency in the text so far. |
|
logit_bias (dict, optional): Used to modify the probability of specific tokens appearing in the completion. |
|
user (str, optional): A unique identifier representing your end-user. This can help the LLM provider to monitor and detect abuse. |
|
logprobs (bool, optional): Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message |
|
top_logprobs (int, optional): An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used. |
|
metadata (dict, optional): Pass in additional metadata to tag your completion calls - eg. prompt version, details, etc. |
|
api_base (str, optional): Base URL for the API (default is None). |
|
api_version (str, optional): API version (default is None). |
|
api_key (str, optional): API key (default is None). |
|
model_list (list, optional): List of api base, version, keys |
|
extra_headers (dict, optional): Additional headers to include in the request. |
|
|
|
LITELLM Specific Params |
|
mock_response (str, optional): If provided, return a mock completion response for testing or debugging purposes (default is None). |
|
custom_llm_provider (str, optional): Used for Non-OpenAI LLMs, Example usage for bedrock, set model="amazon.titan-tg1-large" and custom_llm_provider="bedrock" |
|
max_retries (int, optional): The number of retries to attempt (default is 0). |
|
Returns: |
|
ModelResponse: A response object containing the generated completion and associated metadata. |
|
|
|
Note: |
|
- This function is used to perform completions() using the specified language model. |
|
- It supports various optional parameters for customizing the completion behavior. |
|
- If 'mock_response' is provided, a mock completion response is returned for testing or debugging. |
|
""" |
|
|
|
if model is None: |
|
raise ValueError("model param not passed in.") |
|
|
|
messages = validate_chat_completion_messages(messages=messages) |
|
|
|
tool_choice = validate_chat_completion_tool_choice(tool_choice=tool_choice) |
|
|
|
args = locals() |
|
api_base = kwargs.get("api_base", None) |
|
mock_response = kwargs.get("mock_response", None) |
|
mock_tool_calls = kwargs.get("mock_tool_calls", None) |
|
mock_timeout = cast(Optional[bool], kwargs.get("mock_timeout", None)) |
|
force_timeout = kwargs.get("force_timeout", 600) |
|
logger_fn = kwargs.get("logger_fn", None) |
|
verbose = kwargs.get("verbose", False) |
|
custom_llm_provider = kwargs.get("custom_llm_provider", None) |
|
litellm_logging_obj = kwargs.get("litellm_logging_obj", None) |
|
id = kwargs.get("id", None) |
|
metadata = kwargs.get("metadata", None) |
|
model_info = kwargs.get("model_info", None) |
|
proxy_server_request = kwargs.get("proxy_server_request", None) |
|
fallbacks = kwargs.get("fallbacks", None) |
|
provider_specific_header = cast( |
|
Optional[ProviderSpecificHeader], kwargs.get("provider_specific_header", None) |
|
) |
|
headers = kwargs.get("headers", None) or extra_headers |
|
|
|
ensure_alternating_roles: Optional[bool] = kwargs.get( |
|
"ensure_alternating_roles", None |
|
) |
|
user_continue_message: Optional[ChatCompletionUserMessage] = kwargs.get( |
|
"user_continue_message", None |
|
) |
|
assistant_continue_message: Optional[ChatCompletionAssistantMessage] = kwargs.get( |
|
"assistant_continue_message", None |
|
) |
|
if headers is None: |
|
headers = {} |
|
if extra_headers is not None: |
|
headers.update(extra_headers) |
|
num_retries = kwargs.get( |
|
"num_retries", None |
|
) |
|
max_retries = kwargs.get("max_retries", None) |
|
cooldown_time = kwargs.get("cooldown_time", None) |
|
context_window_fallback_dict = kwargs.get("context_window_fallback_dict", None) |
|
organization = kwargs.get("organization", None) |
|
|
|
ssl_verify = kwargs.get("ssl_verify", None) |
|
|
|
input_cost_per_token = kwargs.get("input_cost_per_token", None) |
|
output_cost_per_token = kwargs.get("output_cost_per_token", None) |
|
input_cost_per_second = kwargs.get("input_cost_per_second", None) |
|
output_cost_per_second = kwargs.get("output_cost_per_second", None) |
|
|
|
initial_prompt_value = kwargs.get("initial_prompt_value", None) |
|
roles = kwargs.get("roles", None) |
|
final_prompt_value = kwargs.get("final_prompt_value", None) |
|
bos_token = kwargs.get("bos_token", None) |
|
eos_token = kwargs.get("eos_token", None) |
|
preset_cache_key = kwargs.get("preset_cache_key", None) |
|
hf_model_name = kwargs.get("hf_model_name", None) |
|
supports_system_message = kwargs.get("supports_system_message", None) |
|
base_model = kwargs.get("base_model", None) |
|
|
|
disable_add_transform_inline_image_block = kwargs.get( |
|
"disable_add_transform_inline_image_block", None |
|
) |
|
|
|
text_completion = kwargs.get("text_completion", False) |
|
atext_completion = kwargs.get("atext_completion", False) |
|
|
|
acompletion = kwargs.get("acompletion", False) |
|
client = kwargs.get("client", None) |
|
|
|
no_log = kwargs.get("no-log", False) |
|
|
|
prompt_id = cast(Optional[str], kwargs.get("prompt_id", None)) |
|
prompt_variables = cast(Optional[dict], kwargs.get("prompt_variables", None)) |
|
|
|
messages = get_completion_messages( |
|
messages=messages, |
|
ensure_alternating_roles=ensure_alternating_roles or False, |
|
user_continue_message=user_continue_message, |
|
assistant_continue_message=assistant_continue_message, |
|
) |
|
|
|
non_default_params = get_non_default_completion_params(kwargs=kwargs) |
|
litellm_params = {} |
|
|
|
|
|
if isinstance(litellm_logging_obj, LiteLLMLoggingObj) and prompt_id is not None: |
|
model, messages, optional_params = ( |
|
litellm_logging_obj.get_chat_completion_prompt( |
|
model=model, |
|
messages=messages, |
|
non_default_params=non_default_params, |
|
prompt_id=prompt_id, |
|
prompt_variables=prompt_variables, |
|
) |
|
) |
|
|
|
try: |
|
if base_url is not None: |
|
api_base = base_url |
|
if num_retries is not None: |
|
max_retries = num_retries |
|
logging = litellm_logging_obj |
|
fallbacks = fallbacks or litellm.model_fallbacks |
|
if fallbacks is not None: |
|
return completion_with_fallbacks(**args) |
|
if model_list is not None: |
|
deployments = [ |
|
m["litellm_params"] for m in model_list if m["model_name"] == model |
|
] |
|
return litellm.batch_completion_models(deployments=deployments, **args) |
|
if litellm.model_alias_map and model in litellm.model_alias_map: |
|
model = litellm.model_alias_map[ |
|
model |
|
] |
|
model_response = ModelResponse() |
|
setattr(model_response, "usage", litellm.Usage()) |
|
if ( |
|
kwargs.get("azure", False) is True |
|
): |
|
custom_llm_provider = "azure" |
|
if deployment_id is not None: |
|
model = deployment_id |
|
custom_llm_provider = "azure" |
|
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
api_key=api_key, |
|
) |
|
|
|
if ( |
|
provider_specific_header is not None |
|
and provider_specific_header["custom_llm_provider"] == custom_llm_provider |
|
): |
|
headers.update(provider_specific_header["extra_headers"]) |
|
|
|
if model_response is not None and hasattr(model_response, "_hidden_params"): |
|
model_response._hidden_params["custom_llm_provider"] = custom_llm_provider |
|
model_response._hidden_params["region_name"] = kwargs.get( |
|
"aws_region_name", None |
|
) |
|
|
|
|
|
timeout = timeout or kwargs.get("request_timeout", 600) or 600 |
|
|
|
if isinstance(timeout, httpx.Timeout) and not supports_httpx_timeout( |
|
custom_llm_provider |
|
): |
|
timeout = timeout.read or 600 |
|
elif not isinstance(timeout, httpx.Timeout): |
|
timeout = float(timeout) |
|
|
|
|
|
if input_cost_per_token is not None and output_cost_per_token is not None: |
|
litellm.register_model( |
|
{ |
|
f"{custom_llm_provider}/{model}": { |
|
"input_cost_per_token": input_cost_per_token, |
|
"output_cost_per_token": output_cost_per_token, |
|
"litellm_provider": custom_llm_provider, |
|
} |
|
} |
|
) |
|
elif ( |
|
input_cost_per_second is not None |
|
): |
|
output_cost_per_second = output_cost_per_second |
|
litellm.register_model( |
|
{ |
|
f"{custom_llm_provider}/{model}": { |
|
"input_cost_per_second": input_cost_per_second, |
|
"output_cost_per_second": output_cost_per_second, |
|
"litellm_provider": custom_llm_provider, |
|
} |
|
} |
|
) |
|
|
|
custom_prompt_dict = {} |
|
if ( |
|
initial_prompt_value |
|
or roles |
|
or final_prompt_value |
|
or bos_token |
|
or eos_token |
|
): |
|
custom_prompt_dict = {model: {}} |
|
if initial_prompt_value: |
|
custom_prompt_dict[model]["initial_prompt_value"] = initial_prompt_value |
|
if roles: |
|
custom_prompt_dict[model]["roles"] = roles |
|
if final_prompt_value: |
|
custom_prompt_dict[model]["final_prompt_value"] = final_prompt_value |
|
if bos_token: |
|
custom_prompt_dict[model]["bos_token"] = bos_token |
|
if eos_token: |
|
custom_prompt_dict[model]["eos_token"] = eos_token |
|
|
|
provider_config: Optional[BaseConfig] = None |
|
if custom_llm_provider is not None and custom_llm_provider in [ |
|
provider.value for provider in LlmProviders |
|
]: |
|
provider_config = ProviderConfigManager.get_provider_chat_config( |
|
model=model, provider=LlmProviders(custom_llm_provider) |
|
) |
|
|
|
if provider_config is not None: |
|
messages = provider_config.translate_developer_role_to_system_role( |
|
messages=messages |
|
) |
|
|
|
if ( |
|
supports_system_message is not None |
|
and isinstance(supports_system_message, bool) |
|
and supports_system_message is False |
|
): |
|
messages = map_system_message_pt(messages=messages) |
|
|
|
if dynamic_api_key is not None: |
|
api_key = dynamic_api_key |
|
|
|
optional_params = get_optional_params( |
|
functions=functions, |
|
function_call=function_call, |
|
temperature=temperature, |
|
top_p=top_p, |
|
n=n, |
|
stream=stream, |
|
stream_options=stream_options, |
|
stop=stop, |
|
max_tokens=max_tokens, |
|
max_completion_tokens=max_completion_tokens, |
|
modalities=modalities, |
|
prediction=prediction, |
|
audio=audio, |
|
presence_penalty=presence_penalty, |
|
frequency_penalty=frequency_penalty, |
|
logit_bias=logit_bias, |
|
user=user, |
|
|
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
response_format=response_format, |
|
seed=seed, |
|
tools=tools, |
|
tool_choice=tool_choice, |
|
max_retries=max_retries, |
|
logprobs=logprobs, |
|
top_logprobs=top_logprobs, |
|
api_version=api_version, |
|
parallel_tool_calls=parallel_tool_calls, |
|
messages=messages, |
|
reasoning_effort=reasoning_effort, |
|
**non_default_params, |
|
) |
|
|
|
if litellm.add_function_to_prompt and optional_params.get( |
|
"functions_unsupported_model", None |
|
): |
|
functions_unsupported_model = optional_params.pop( |
|
"functions_unsupported_model" |
|
) |
|
messages = function_call_prompt( |
|
messages=messages, functions=functions_unsupported_model |
|
) |
|
|
|
|
|
litellm_params = get_litellm_params( |
|
acompletion=acompletion, |
|
api_key=api_key, |
|
force_timeout=force_timeout, |
|
logger_fn=logger_fn, |
|
verbose=verbose, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
litellm_call_id=kwargs.get("litellm_call_id", None), |
|
model_alias_map=litellm.model_alias_map, |
|
completion_call_id=id, |
|
metadata=metadata, |
|
model_info=model_info, |
|
proxy_server_request=proxy_server_request, |
|
preset_cache_key=preset_cache_key, |
|
no_log=no_log, |
|
input_cost_per_second=input_cost_per_second, |
|
input_cost_per_token=input_cost_per_token, |
|
output_cost_per_second=output_cost_per_second, |
|
output_cost_per_token=output_cost_per_token, |
|
cooldown_time=cooldown_time, |
|
text_completion=kwargs.get("text_completion"), |
|
azure_ad_token_provider=kwargs.get("azure_ad_token_provider"), |
|
user_continue_message=kwargs.get("user_continue_message"), |
|
base_model=base_model, |
|
litellm_trace_id=kwargs.get("litellm_trace_id"), |
|
hf_model_name=hf_model_name, |
|
custom_prompt_dict=custom_prompt_dict, |
|
litellm_metadata=kwargs.get("litellm_metadata"), |
|
disable_add_transform_inline_image_block=disable_add_transform_inline_image_block, |
|
drop_params=kwargs.get("drop_params"), |
|
prompt_id=prompt_id, |
|
prompt_variables=prompt_variables, |
|
ssl_verify=ssl_verify, |
|
) |
|
logging.update_environment_variables( |
|
model=model, |
|
user=user, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
if mock_response or mock_tool_calls or mock_timeout: |
|
kwargs.pop("mock_timeout", None) |
|
return mock_completion( |
|
model, |
|
messages, |
|
stream=stream, |
|
n=n, |
|
mock_response=mock_response, |
|
mock_tool_calls=mock_tool_calls, |
|
logging=logging, |
|
acompletion=acompletion, |
|
mock_delay=kwargs.get("mock_delay", None), |
|
custom_llm_provider=custom_llm_provider, |
|
mock_timeout=mock_timeout, |
|
timeout=timeout, |
|
) |
|
|
|
if custom_llm_provider == "azure": |
|
|
|
|
|
dynamic_params = False |
|
if client is not None and ( |
|
isinstance(client, openai.AzureOpenAI) |
|
or isinstance(client, openai.AsyncAzureOpenAI) |
|
): |
|
dynamic_params = _check_dynamic_azure_params( |
|
azure_client_params={"api_version": api_version}, |
|
azure_client=client, |
|
) |
|
|
|
api_type = get_secret("AZURE_API_TYPE") or "azure" |
|
|
|
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE") |
|
|
|
api_version = ( |
|
api_version |
|
or litellm.api_version |
|
or get_secret("AZURE_API_VERSION") |
|
or litellm.AZURE_DEFAULT_API_VERSION |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.azure_key |
|
or get_secret("AZURE_OPENAI_API_KEY") |
|
or get_secret("AZURE_API_KEY") |
|
) |
|
|
|
azure_ad_token = optional_params.get("extra_body", {}).pop( |
|
"azure_ad_token", None |
|
) or get_secret("AZURE_AD_TOKEN") |
|
|
|
azure_ad_token_provider = litellm_params.get( |
|
"azure_ad_token_provider", None |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
if extra_headers is not None: |
|
optional_params["extra_headers"] = extra_headers |
|
|
|
if litellm.AzureOpenAIO1Config().is_o_series_model(model=model): |
|
|
|
|
|
config = litellm.AzureOpenAIO1Config.get_config() |
|
for k, v in config.items(): |
|
if ( |
|
k not in optional_params |
|
): |
|
optional_params[k] = v |
|
|
|
response = azure_o1_chat_completions.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
api_key=api_key, |
|
api_base=api_base, |
|
api_version=api_version, |
|
dynamic_params=dynamic_params, |
|
azure_ad_token=azure_ad_token, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
timeout=timeout, |
|
client=client, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
else: |
|
|
|
config = litellm.AzureOpenAIConfig.get_config() |
|
for k, v in config.items(): |
|
if ( |
|
k not in optional_params |
|
): |
|
optional_params[k] = v |
|
|
|
|
|
response = azure_chat_completions.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
api_key=api_key, |
|
api_base=api_base, |
|
api_version=api_version, |
|
api_type=api_type, |
|
dynamic_params=dynamic_params, |
|
azure_ad_token=azure_ad_token, |
|
azure_ad_token_provider=azure_ad_token_provider, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
timeout=timeout, |
|
client=client, |
|
) |
|
|
|
if optional_params.get("stream", False): |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=response, |
|
additional_args={ |
|
"headers": headers, |
|
"api_version": api_version, |
|
"api_base": api_base, |
|
}, |
|
) |
|
elif custom_llm_provider == "azure_text": |
|
|
|
api_type = get_secret("AZURE_API_TYPE") or "azure" |
|
|
|
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE") |
|
|
|
api_version = ( |
|
api_version or litellm.api_version or get_secret("AZURE_API_VERSION") |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.azure_key |
|
or get_secret("AZURE_OPENAI_API_KEY") |
|
or get_secret("AZURE_API_KEY") |
|
) |
|
|
|
azure_ad_token = optional_params.get("extra_body", {}).pop( |
|
"azure_ad_token", None |
|
) or get_secret("AZURE_AD_TOKEN") |
|
|
|
azure_ad_token_provider = litellm_params.get( |
|
"azure_ad_token_provider", None |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
if extra_headers is not None: |
|
optional_params["extra_headers"] = extra_headers |
|
|
|
|
|
config = litellm.AzureOpenAIConfig.get_config() |
|
for k, v in config.items(): |
|
if ( |
|
k not in optional_params |
|
): |
|
optional_params[k] = v |
|
|
|
|
|
response = azure_text_completions.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
api_key=api_key, |
|
api_base=api_base, |
|
api_version=api_version, |
|
api_type=api_type, |
|
azure_ad_token=azure_ad_token, |
|
azure_ad_token_provider=azure_ad_token_provider, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
timeout=timeout, |
|
client=client, |
|
) |
|
|
|
if optional_params.get("stream", False) or acompletion is True: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=response, |
|
additional_args={ |
|
"headers": headers, |
|
"api_version": api_version, |
|
"api_base": api_base, |
|
}, |
|
) |
|
elif custom_llm_provider == "deepseek": |
|
|
|
try: |
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
model_response=model_response, |
|
api_key=api_key, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
timeout=timeout, |
|
client=client, |
|
custom_llm_provider=custom_llm_provider, |
|
encoding=encoding, |
|
stream=stream, |
|
) |
|
except Exception as e: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=str(e), |
|
additional_args={"headers": headers}, |
|
) |
|
raise e |
|
|
|
elif custom_llm_provider == "azure_ai": |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("AZURE_AI_API_BASE") |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret("AZURE_AI_API_KEY") |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
if extra_headers is not None: |
|
optional_params["extra_headers"] = extra_headers |
|
|
|
|
|
if "command-r" in model: |
|
messages = stringify_json_tool_call_content(messages=messages) |
|
|
|
|
|
try: |
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
model_response=model_response, |
|
api_key=api_key, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
timeout=timeout, |
|
client=client, |
|
custom_llm_provider=custom_llm_provider, |
|
encoding=encoding, |
|
stream=stream, |
|
) |
|
except Exception as e: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=str(e), |
|
additional_args={"headers": headers}, |
|
) |
|
raise e |
|
|
|
if optional_params.get("stream", False): |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=response, |
|
additional_args={"headers": headers}, |
|
) |
|
elif ( |
|
custom_llm_provider == "text-completion-openai" |
|
or "ft:babbage-002" in model |
|
or "ft:davinci-002" in model |
|
or custom_llm_provider |
|
in litellm.openai_text_completion_compatible_providers |
|
and kwargs.get("text_completion") is True |
|
): |
|
openai.api_type = "openai" |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("OPENAI_API_BASE") |
|
or "https://api.openai.com/v1" |
|
) |
|
|
|
openai.api_version = None |
|
|
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret("OPENAI_API_KEY") |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
if extra_headers is not None: |
|
optional_params["extra_headers"] = extra_headers |
|
|
|
|
|
config = litellm.OpenAITextCompletionConfig.get_config() |
|
for k, v in config.items(): |
|
if ( |
|
k not in optional_params |
|
): |
|
optional_params[k] = v |
|
if litellm.organization: |
|
openai.organization = litellm.organization |
|
|
|
if ( |
|
len(messages) > 0 |
|
and "content" in messages[0] |
|
and isinstance(messages[0]["content"], list) |
|
): |
|
|
|
|
|
prompt = messages[0]["content"] |
|
else: |
|
prompt = " ".join([message["content"] for message in messages]) |
|
|
|
|
|
_response = openai_text_completions.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
api_key=api_key, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
client=client, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
timeout=timeout, |
|
) |
|
|
|
if ( |
|
optional_params.get("stream", False) is False |
|
and acompletion is False |
|
and text_completion is False |
|
): |
|
|
|
_response = litellm.OpenAITextCompletionConfig().convert_to_chat_model_response_object( |
|
response_object=_response, model_response_object=model_response |
|
) |
|
|
|
if optional_params.get("stream", False) or acompletion is True: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=_response, |
|
additional_args={"headers": headers}, |
|
) |
|
response = _response |
|
elif custom_llm_provider == "groq": |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("GROQ_API_BASE") |
|
or "https://api.groq.com/openai/v1" |
|
) |
|
|
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.groq_key |
|
or get_secret("GROQ_API_KEY") |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
|
|
config = litellm.GroqChatConfig.get_config() |
|
for k, v in config.items(): |
|
if ( |
|
k not in optional_params |
|
): |
|
optional_params[k] = v |
|
|
|
response = groq_chat_completions.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
api_key=api_key, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
timeout=timeout, |
|
custom_prompt_dict=custom_prompt_dict, |
|
client=client, |
|
custom_llm_provider=custom_llm_provider, |
|
encoding=encoding, |
|
) |
|
elif custom_llm_provider == "aiohttp_openai": |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("OPENAI_API_BASE") |
|
or "https://api.openai.com/v1" |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret("OPENAI_API_KEY") |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
if extra_headers is not None: |
|
optional_params["extra_headers"] = extra_headers |
|
response = base_llm_aiohttp_handler.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
model_response=model_response, |
|
api_key=api_key, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
timeout=timeout, |
|
client=client, |
|
custom_llm_provider=custom_llm_provider, |
|
encoding=encoding, |
|
stream=stream, |
|
) |
|
elif ( |
|
model in litellm.open_ai_chat_completion_models |
|
or custom_llm_provider == "custom_openai" |
|
or custom_llm_provider == "deepinfra" |
|
or custom_llm_provider == "perplexity" |
|
or custom_llm_provider == "nvidia_nim" |
|
or custom_llm_provider == "cerebras" |
|
or custom_llm_provider == "sambanova" |
|
or custom_llm_provider == "volcengine" |
|
or custom_llm_provider == "anyscale" |
|
or custom_llm_provider == "mistral" |
|
or custom_llm_provider == "openai" |
|
or custom_llm_provider == "together_ai" |
|
or custom_llm_provider in litellm.openai_compatible_providers |
|
or "ft:gpt-3.5-turbo" in model |
|
): |
|
|
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("OPENAI_API_BASE") |
|
or "https://api.openai.com/v1" |
|
) |
|
organization = ( |
|
organization |
|
or litellm.organization |
|
or get_secret("OPENAI_ORGANIZATION") |
|
or None |
|
) |
|
openai.organization = organization |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret("OPENAI_API_KEY") |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
if extra_headers is not None: |
|
optional_params["extra_headers"] = extra_headers |
|
|
|
if ( |
|
litellm.enable_preview_features and metadata is not None |
|
): |
|
optional_params["metadata"] = add_openai_metadata(metadata) |
|
|
|
|
|
config = litellm.OpenAIConfig.get_config() |
|
for k, v in config.items(): |
|
if ( |
|
k not in optional_params |
|
): |
|
optional_params[k] = v |
|
|
|
|
|
try: |
|
response = openai_chat_completions.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
api_key=api_key, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
timeout=timeout, |
|
custom_prompt_dict=custom_prompt_dict, |
|
client=client, |
|
organization=organization, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
except Exception as e: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=str(e), |
|
additional_args={"headers": headers}, |
|
) |
|
raise e |
|
|
|
if optional_params.get("stream", False): |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=response, |
|
additional_args={"headers": headers}, |
|
) |
|
|
|
elif ( |
|
"replicate" in model |
|
or custom_llm_provider == "replicate" |
|
or model in litellm.replicate_models |
|
): |
|
|
|
replicate_key = ( |
|
api_key |
|
or litellm.replicate_key |
|
or litellm.api_key |
|
or get_secret("REPLICATE_API_KEY") |
|
or get_secret("REPLICATE_API_TOKEN") |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("REPLICATE_API_BASE") |
|
or "https://api.replicate.com/v1" |
|
) |
|
|
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict |
|
|
|
model_response = replicate_chat_completion( |
|
model=model, |
|
messages=messages, |
|
api_base=api_base, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
api_key=replicate_key, |
|
logging_obj=logging, |
|
custom_prompt_dict=custom_prompt_dict, |
|
acompletion=acompletion, |
|
headers=headers, |
|
) |
|
|
|
if optional_params.get("stream", False) is True: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=replicate_key, |
|
original_response=model_response, |
|
) |
|
|
|
response = model_response |
|
elif ( |
|
"clarifai" in model |
|
or custom_llm_provider == "clarifai" |
|
or model in litellm.clarifai_models |
|
): |
|
clarifai_key = None |
|
clarifai_key = ( |
|
api_key |
|
or litellm.clarifai_key |
|
or litellm.api_key |
|
or get_secret("CLARIFAI_API_KEY") |
|
or get_secret("CLARIFAI_API_TOKEN") |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("CLARIFAI_API_BASE") |
|
or "https://api.clarifai.com/v2" |
|
) |
|
api_base = litellm.ClarifaiConfig()._convert_model_to_url(model, api_base) |
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
fake_stream=True, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider="clarifai", |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=clarifai_key, |
|
logging_obj=logging, |
|
) |
|
elif custom_llm_provider == "anthropic_text": |
|
api_key = ( |
|
api_key |
|
or litellm.anthropic_key |
|
or litellm.api_key |
|
or os.environ.get("ANTHROPIC_API_KEY") |
|
) |
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("ANTHROPIC_API_BASE") |
|
or get_secret("ANTHROPIC_BASE_URL") |
|
or "https://api.anthropic.com/v1/complete" |
|
) |
|
|
|
if api_base is not None and not api_base.endswith("/v1/complete"): |
|
api_base += "/v1/complete" |
|
|
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider="anthropic_text", |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
) |
|
elif custom_llm_provider == "anthropic": |
|
api_key = ( |
|
api_key |
|
or litellm.anthropic_key |
|
or litellm.api_key |
|
or os.environ.get("ANTHROPIC_API_KEY") |
|
) |
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict |
|
|
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("ANTHROPIC_API_BASE") |
|
or get_secret("ANTHROPIC_BASE_URL") |
|
or "https://api.anthropic.com/v1/messages" |
|
) |
|
|
|
if api_base is not None and not api_base.endswith("/v1/messages"): |
|
api_base += "/v1/messages" |
|
|
|
response = anthropic_chat_completions.completion( |
|
model=model, |
|
messages=messages, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
custom_prompt_dict=litellm.custom_prompt_dict, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
headers=headers, |
|
timeout=timeout, |
|
client=client, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
if optional_params.get("stream", False) or acompletion is True: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=response, |
|
) |
|
response = response |
|
elif custom_llm_provider == "nlp_cloud": |
|
nlp_cloud_key = ( |
|
api_key |
|
or litellm.nlp_cloud_key |
|
or get_secret("NLP_CLOUD_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("NLP_CLOUD_API_BASE") |
|
or "https://api.nlpcloud.io/v1/gpu/" |
|
) |
|
|
|
response = nlp_cloud_chat_completion( |
|
model=model, |
|
messages=messages, |
|
api_base=api_base, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
api_key=nlp_cloud_key, |
|
logging_obj=logging, |
|
) |
|
|
|
if "stream" in optional_params and optional_params["stream"] is True: |
|
|
|
response = CustomStreamWrapper( |
|
response, |
|
model, |
|
custom_llm_provider="nlp_cloud", |
|
logging_obj=logging, |
|
) |
|
|
|
if optional_params.get("stream", False) or acompletion is True: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=response, |
|
) |
|
|
|
response = response |
|
elif custom_llm_provider == "aleph_alpha": |
|
aleph_alpha_key = ( |
|
api_key |
|
or litellm.aleph_alpha_key |
|
or get_secret("ALEPH_ALPHA_API_KEY") |
|
or get_secret("ALEPHALPHA_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("ALEPH_ALPHA_API_BASE") |
|
or "https://api.aleph-alpha.com/complete" |
|
) |
|
|
|
model_response = aleph_alpha.completion( |
|
model=model, |
|
messages=messages, |
|
api_base=api_base, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
default_max_tokens_to_sample=litellm.max_tokens, |
|
api_key=aleph_alpha_key, |
|
logging_obj=logging, |
|
) |
|
|
|
if "stream" in optional_params and optional_params["stream"] is True: |
|
|
|
response = CustomStreamWrapper( |
|
model_response, |
|
model, |
|
custom_llm_provider="aleph_alpha", |
|
logging_obj=logging, |
|
) |
|
return response |
|
response = model_response |
|
elif custom_llm_provider == "cohere": |
|
cohere_key = ( |
|
api_key |
|
or litellm.cohere_key |
|
or get_secret("COHERE_API_KEY") |
|
or get_secret("CO_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("COHERE_API_BASE") |
|
or "https://api.cohere.ai/v1/generate" |
|
) |
|
|
|
headers = headers or litellm.headers or {} |
|
if headers is None: |
|
headers = {} |
|
|
|
if extra_headers is not None: |
|
headers.update(extra_headers) |
|
|
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider="cohere", |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=cohere_key, |
|
logging_obj=logging, |
|
client=client, |
|
) |
|
elif custom_llm_provider == "cohere_chat": |
|
cohere_key = ( |
|
api_key |
|
or litellm.cohere_key |
|
or get_secret_str("COHERE_API_KEY") |
|
or get_secret_str("CO_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret_str("COHERE_API_BASE") |
|
or "https://api.cohere.ai/v1/chat" |
|
) |
|
|
|
headers = headers or litellm.headers or {} |
|
if headers is None: |
|
headers = {} |
|
|
|
if extra_headers is not None: |
|
headers.update(extra_headers) |
|
|
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider="cohere_chat", |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=cohere_key, |
|
logging_obj=logging, |
|
) |
|
elif custom_llm_provider == "maritalk": |
|
maritalk_key = ( |
|
api_key |
|
or litellm.maritalk_key |
|
or get_secret("MARITALK_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("MARITALK_API_BASE") |
|
or "https://chat.maritaca.ai/api" |
|
) |
|
|
|
model_response = openai_like_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
api_base=api_base, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
api_key=maritalk_key, |
|
logging_obj=logging, |
|
custom_llm_provider="maritalk", |
|
custom_prompt_dict=custom_prompt_dict, |
|
) |
|
|
|
response = model_response |
|
elif custom_llm_provider == "huggingface": |
|
custom_llm_provider = "huggingface" |
|
huggingface_key = ( |
|
api_key |
|
or litellm.huggingface_key |
|
or os.environ.get("HF_TOKEN") |
|
or os.environ.get("HUGGINGFACE_API_KEY") |
|
or litellm.api_key |
|
) |
|
hf_headers = headers or litellm.headers |
|
|
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict |
|
model_response = huggingface.completion( |
|
model=model, |
|
messages=messages, |
|
api_base=api_base, |
|
headers=hf_headers or {}, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
api_key=huggingface_key, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
custom_prompt_dict=custom_prompt_dict, |
|
timeout=timeout, |
|
client=client, |
|
) |
|
if ( |
|
"stream" in optional_params |
|
and optional_params["stream"] is True |
|
and acompletion is False |
|
): |
|
|
|
response = CustomStreamWrapper( |
|
model_response, |
|
model, |
|
custom_llm_provider="huggingface", |
|
logging_obj=logging, |
|
) |
|
return response |
|
response = model_response |
|
elif custom_llm_provider == "oobabooga": |
|
custom_llm_provider = "oobabooga" |
|
model_response = oobabooga.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
api_base=api_base, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
api_key=None, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
) |
|
if "stream" in optional_params and optional_params["stream"] is True: |
|
|
|
response = CustomStreamWrapper( |
|
model_response, |
|
model, |
|
custom_llm_provider="oobabooga", |
|
logging_obj=logging, |
|
) |
|
return response |
|
response = model_response |
|
elif custom_llm_provider == "databricks": |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or os.getenv("DATABRICKS_API_BASE") |
|
) |
|
|
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.databricks_key |
|
or get_secret("DATABRICKS_API_KEY") |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
|
|
try: |
|
response = databricks_chat_completions.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
api_key=api_key, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
timeout=timeout, |
|
custom_prompt_dict=custom_prompt_dict, |
|
client=client, |
|
encoding=encoding, |
|
custom_llm_provider="databricks", |
|
) |
|
except Exception as e: |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=str(e), |
|
additional_args={"headers": headers}, |
|
) |
|
raise e |
|
|
|
if optional_params.get("stream", False): |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=api_key, |
|
original_response=response, |
|
additional_args={"headers": headers}, |
|
) |
|
elif custom_llm_provider == "openrouter": |
|
api_base = api_base or litellm.api_base or "https://openrouter.ai/api/v1" |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openrouter_key |
|
or get_secret("OPENROUTER_API_KEY") |
|
or get_secret("OR_API_KEY") |
|
) |
|
|
|
openrouter_site_url = get_secret("OR_SITE_URL") or "https://litellm.ai" |
|
openrouter_app_name = get_secret("OR_APP_NAME") or "liteLLM" |
|
|
|
openrouter_headers = { |
|
"HTTP-Referer": openrouter_site_url, |
|
"X-Title": openrouter_app_name, |
|
} |
|
|
|
_headers = headers or litellm.headers |
|
if _headers: |
|
openrouter_headers.update(_headers) |
|
|
|
headers = openrouter_headers |
|
|
|
|
|
config = litellm.OpenrouterConfig.get_config() |
|
for k, v in config.items(): |
|
if k == "extra_body": |
|
|
|
if "extra_body" in optional_params: |
|
optional_params[k].update(v) |
|
else: |
|
optional_params[k] = v |
|
elif k not in optional_params: |
|
optional_params[k] = v |
|
|
|
data = {"model": model, "messages": messages, **optional_params} |
|
|
|
|
|
response = openai_like_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
api_key=api_key, |
|
api_base=api_base, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
timeout=timeout, |
|
custom_llm_provider="openrouter", |
|
custom_prompt_dict=custom_prompt_dict, |
|
encoding=encoding, |
|
) |
|
|
|
logging.post_call( |
|
input=messages, api_key=openai.api_key, original_response=response |
|
) |
|
elif ( |
|
custom_llm_provider == "together_ai" |
|
or ("togethercomputer" in model) |
|
or (model in litellm.together_ai_models) |
|
): |
|
""" |
|
Deprecated. We now do together ai calls via the openai client - https://docs.together.ai/docs/openai-api-compatibility |
|
""" |
|
pass |
|
elif custom_llm_provider == "palm": |
|
raise ValueError( |
|
"Palm was decommisioned on October 2024. Please use the `gemini/` route for Gemini Google AI Studio Models. Announcement: https://ai.google.dev/palm_docs/palm?hl=en" |
|
) |
|
elif custom_llm_provider == "vertex_ai_beta" or custom_llm_provider == "gemini": |
|
vertex_ai_project = ( |
|
optional_params.pop("vertex_project", None) |
|
or optional_params.pop("vertex_ai_project", None) |
|
or litellm.vertex_project |
|
or get_secret("VERTEXAI_PROJECT") |
|
) |
|
vertex_ai_location = ( |
|
optional_params.pop("vertex_location", None) |
|
or optional_params.pop("vertex_ai_location", None) |
|
or litellm.vertex_location |
|
or get_secret("VERTEXAI_LOCATION") |
|
) |
|
vertex_credentials = ( |
|
optional_params.pop("vertex_credentials", None) |
|
or optional_params.pop("vertex_ai_credentials", None) |
|
or get_secret("VERTEXAI_CREDENTIALS") |
|
) |
|
|
|
gemini_api_key = ( |
|
api_key |
|
or get_secret("GEMINI_API_KEY") |
|
or get_secret("PALM_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
new_params = deepcopy(optional_params) |
|
response = vertex_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=new_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
vertex_location=vertex_ai_location, |
|
vertex_project=vertex_ai_project, |
|
vertex_credentials=vertex_credentials, |
|
gemini_api_key=gemini_api_key, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
timeout=timeout, |
|
custom_llm_provider=custom_llm_provider, |
|
client=client, |
|
api_base=api_base, |
|
extra_headers=extra_headers, |
|
) |
|
|
|
elif custom_llm_provider == "vertex_ai": |
|
vertex_ai_project = ( |
|
optional_params.pop("vertex_project", None) |
|
or optional_params.pop("vertex_ai_project", None) |
|
or litellm.vertex_project |
|
or get_secret("VERTEXAI_PROJECT") |
|
) |
|
vertex_ai_location = ( |
|
optional_params.pop("vertex_location", None) |
|
or optional_params.pop("vertex_ai_location", None) |
|
or litellm.vertex_location |
|
or get_secret("VERTEXAI_LOCATION") |
|
) |
|
vertex_credentials = ( |
|
optional_params.pop("vertex_credentials", None) |
|
or optional_params.pop("vertex_ai_credentials", None) |
|
or get_secret("VERTEXAI_CREDENTIALS") |
|
) |
|
|
|
new_params = deepcopy(optional_params) |
|
if ( |
|
model.startswith("meta/") |
|
or model.startswith("mistral") |
|
or model.startswith("codestral") |
|
or model.startswith("jamba") |
|
or model.startswith("claude") |
|
): |
|
model_response = vertex_partner_models_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=new_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
api_base=api_base, |
|
vertex_location=vertex_ai_location, |
|
vertex_project=vertex_ai_project, |
|
vertex_credentials=vertex_credentials, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
headers=headers, |
|
custom_prompt_dict=custom_prompt_dict, |
|
timeout=timeout, |
|
client=client, |
|
) |
|
elif "gemini" in model or ( |
|
litellm_params.get("base_model") is not None |
|
and "gemini" in litellm_params["base_model"] |
|
): |
|
model_response = vertex_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=new_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
vertex_location=vertex_ai_location, |
|
vertex_project=vertex_ai_project, |
|
vertex_credentials=vertex_credentials, |
|
gemini_api_key=None, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
timeout=timeout, |
|
custom_llm_provider=custom_llm_provider, |
|
client=client, |
|
api_base=api_base, |
|
extra_headers=extra_headers, |
|
) |
|
elif "openai" in model: |
|
|
|
model_response = vertex_model_garden_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=new_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
api_base=api_base, |
|
vertex_location=vertex_ai_location, |
|
vertex_project=vertex_ai_project, |
|
vertex_credentials=vertex_credentials, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
headers=headers, |
|
custom_prompt_dict=custom_prompt_dict, |
|
timeout=timeout, |
|
client=client, |
|
) |
|
else: |
|
model_response = vertex_ai_non_gemini.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=new_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
vertex_location=vertex_ai_location, |
|
vertex_project=vertex_ai_project, |
|
vertex_credentials=vertex_credentials, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
) |
|
|
|
if ( |
|
"stream" in optional_params |
|
and optional_params["stream"] is True |
|
and acompletion is False |
|
): |
|
response = CustomStreamWrapper( |
|
model_response, |
|
model, |
|
custom_llm_provider="vertex_ai", |
|
logging_obj=logging, |
|
) |
|
return response |
|
response = model_response |
|
elif custom_llm_provider == "predibase": |
|
tenant_id = ( |
|
optional_params.pop("tenant_id", None) |
|
or optional_params.pop("predibase_tenant_id", None) |
|
or litellm.predibase_tenant_id |
|
or get_secret("PREDIBASE_TENANT_ID") |
|
) |
|
|
|
if tenant_id is None: |
|
raise ValueError( |
|
"Missing Predibase Tenant ID - Required for making the request. Set dynamically (e.g. `completion(..tenant_id=<MY-ID>)`) or in env - `PREDIBASE_TENANT_ID`." |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or optional_params.pop("api_base", None) |
|
or optional_params.pop("base_url", None) |
|
or litellm.api_base |
|
or get_secret("PREDIBASE_API_BASE") |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.predibase_key |
|
or get_secret("PREDIBASE_API_KEY") |
|
) |
|
|
|
_model_response = predibase_chat_completions.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
custom_prompt_dict=custom_prompt_dict, |
|
api_key=api_key, |
|
tenant_id=tenant_id, |
|
timeout=timeout, |
|
) |
|
|
|
if ( |
|
"stream" in optional_params |
|
and optional_params["stream"] is True |
|
and acompletion is False |
|
): |
|
return _model_response |
|
response = _model_response |
|
elif custom_llm_provider == "text-completion-codestral": |
|
api_base = ( |
|
api_base |
|
or optional_params.pop("api_base", None) |
|
or optional_params.pop("base_url", None) |
|
or litellm.api_base |
|
or "https://codestral.mistral.ai/v1/fim/completions" |
|
) |
|
|
|
api_key = api_key or litellm.api_key or get_secret("CODESTRAL_API_KEY") |
|
|
|
text_completion_model_response = litellm.TextCompletionResponse( |
|
stream=stream |
|
) |
|
|
|
_model_response = codestral_text_completions.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=text_completion_model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
custom_prompt_dict=custom_prompt_dict, |
|
api_key=api_key, |
|
timeout=timeout, |
|
) |
|
|
|
if ( |
|
"stream" in optional_params |
|
and optional_params["stream"] is True |
|
and acompletion is False |
|
): |
|
return _model_response |
|
response = _model_response |
|
elif custom_llm_provider == "sagemaker_chat": |
|
|
|
model_response = sagemaker_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_prompt_dict=custom_prompt_dict, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
) |
|
|
|
|
|
response = model_response |
|
elif custom_llm_provider == "sagemaker": |
|
|
|
model_response = sagemaker_llm.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_prompt_dict=custom_prompt_dict, |
|
hf_model_name=hf_model_name, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
) |
|
|
|
|
|
response = model_response |
|
elif custom_llm_provider == "bedrock": |
|
|
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict |
|
|
|
if "aws_bedrock_client" in optional_params: |
|
verbose_logger.warning( |
|
"'aws_bedrock_client' is a deprecated param. Please move to another auth method - https://docs.litellm.ai/docs/providers/bedrock#boto3---authentication." |
|
) |
|
|
|
aws_bedrock_client = optional_params.pop("aws_bedrock_client") |
|
creds = aws_bedrock_client._get_credentials().get_frozen_credentials() |
|
|
|
if creds.access_key: |
|
optional_params["aws_access_key_id"] = creds.access_key |
|
if creds.secret_key: |
|
optional_params["aws_secret_access_key"] = creds.secret_key |
|
if creds.token: |
|
optional_params["aws_session_token"] = creds.token |
|
if ( |
|
"aws_region_name" not in optional_params |
|
or optional_params["aws_region_name"] is None |
|
): |
|
optional_params["aws_region_name"] = ( |
|
aws_bedrock_client.meta.region_name |
|
) |
|
|
|
base_model = litellm.AmazonConverseConfig()._get_base_model(model) |
|
|
|
if base_model in litellm.bedrock_converse_models or model.startswith( |
|
"converse/" |
|
): |
|
model = model.replace("converse/", "") |
|
response = bedrock_converse_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
custom_prompt_dict=custom_prompt_dict, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
extra_headers=extra_headers, |
|
timeout=timeout, |
|
acompletion=acompletion, |
|
client=client, |
|
api_base=api_base, |
|
) |
|
elif "converse_like" in model: |
|
model = model.replace("converse_like/", "") |
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider="bedrock", |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
client=client, |
|
) |
|
else: |
|
model = model.replace("invoke/", "") |
|
response = bedrock_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
custom_prompt_dict=custom_prompt_dict, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
extra_headers=extra_headers, |
|
timeout=timeout, |
|
acompletion=acompletion, |
|
client=client, |
|
api_base=api_base, |
|
) |
|
|
|
if optional_params.get("stream", False): |
|
|
|
logging.post_call( |
|
input=messages, |
|
api_key=None, |
|
original_response=response, |
|
) |
|
|
|
|
|
response = response |
|
elif custom_llm_provider == "watsonx": |
|
response = watsonx_chat_completion.completion( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
api_key=api_key, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
timeout=timeout, |
|
custom_prompt_dict=custom_prompt_dict, |
|
client=client, |
|
encoding=encoding, |
|
custom_llm_provider="watsonx", |
|
) |
|
elif custom_llm_provider == "watsonx_text": |
|
api_key = ( |
|
api_key |
|
or optional_params.pop("apikey", None) |
|
or get_secret_str("WATSONX_APIKEY") |
|
or get_secret_str("WATSONX_API_KEY") |
|
or get_secret_str("WX_API_KEY") |
|
) |
|
|
|
api_base = ( |
|
api_base |
|
or optional_params.pop( |
|
"url", |
|
optional_params.pop( |
|
"api_base", optional_params.pop("base_url", None) |
|
), |
|
) |
|
or get_secret_str("WATSONX_API_BASE") |
|
or get_secret_str("WATSONX_URL") |
|
or get_secret_str("WX_URL") |
|
or get_secret_str("WML_URL") |
|
) |
|
|
|
wx_credentials = optional_params.pop( |
|
"wx_credentials", |
|
optional_params.pop( |
|
"watsonx_credentials", None |
|
), |
|
) |
|
|
|
token: Optional[str] = None |
|
if wx_credentials is not None: |
|
api_base = wx_credentials.get("url", api_base) |
|
api_key = wx_credentials.get( |
|
"apikey", wx_credentials.get("api_key", api_key) |
|
) |
|
token = wx_credentials.get( |
|
"token", |
|
wx_credentials.get( |
|
"watsonx_token", None |
|
), |
|
) |
|
|
|
if token is not None: |
|
optional_params["token"] = token |
|
|
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider="watsonx_text", |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
client=client, |
|
) |
|
elif custom_llm_provider == "vllm": |
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict |
|
model_response = vllm_handler.completion( |
|
model=model, |
|
messages=messages, |
|
custom_prompt_dict=custom_prompt_dict, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
) |
|
|
|
if ( |
|
"stream" in optional_params and optional_params["stream"] is True |
|
): |
|
|
|
response = CustomStreamWrapper( |
|
model_response, |
|
model, |
|
custom_llm_provider="vllm", |
|
logging_obj=logging, |
|
) |
|
return response |
|
|
|
|
|
response = model_response |
|
elif custom_llm_provider == "ollama": |
|
api_base = ( |
|
litellm.api_base |
|
or api_base |
|
or get_secret("OLLAMA_API_BASE") |
|
or "http://localhost:11434" |
|
) |
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider="ollama", |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
client=client, |
|
) |
|
|
|
elif custom_llm_provider == "ollama_chat": |
|
api_base = ( |
|
litellm.api_base |
|
or api_base |
|
or get_secret("OLLAMA_API_BASE") |
|
or "http://localhost:11434" |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.ollama_key |
|
or os.environ.get("OLLAMA_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
generator = ollama_chat.get_ollama_response( |
|
api_base=api_base, |
|
api_key=api_key, |
|
model=model, |
|
messages=messages, |
|
optional_params=optional_params, |
|
logging_obj=logging, |
|
acompletion=acompletion, |
|
model_response=model_response, |
|
encoding=encoding, |
|
) |
|
if acompletion is True or optional_params.get("stream", False) is True: |
|
return generator |
|
|
|
response = generator |
|
|
|
elif custom_llm_provider == "triton": |
|
api_base = litellm.api_base or api_base |
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider=custom_llm_provider, |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
) |
|
elif custom_llm_provider == "cloudflare": |
|
api_key = ( |
|
api_key |
|
or litellm.cloudflare_api_key |
|
or litellm.api_key |
|
or get_secret("CLOUDFLARE_API_KEY") |
|
) |
|
account_id = get_secret("CLOUDFLARE_ACCOUNT_ID") |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("CLOUDFLARE_API_BASE") |
|
or f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/" |
|
) |
|
|
|
custom_prompt_dict = custom_prompt_dict or litellm.custom_prompt_dict |
|
response = base_llm_http_handler.completion( |
|
model=model, |
|
stream=stream, |
|
messages=messages, |
|
acompletion=acompletion, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
custom_llm_provider="cloudflare", |
|
timeout=timeout, |
|
headers=headers, |
|
encoding=encoding, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
) |
|
elif ( |
|
custom_llm_provider == "baseten" |
|
or litellm.api_base == "https://app.baseten.co" |
|
): |
|
custom_llm_provider = "baseten" |
|
baseten_key = ( |
|
api_key |
|
or litellm.baseten_key |
|
or os.environ.get("BASETEN_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
model_response = baseten.completion( |
|
model=model, |
|
messages=messages, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
api_key=baseten_key, |
|
logging_obj=logging, |
|
) |
|
if inspect.isgenerator(model_response) or ( |
|
"stream" in optional_params and optional_params["stream"] is True |
|
): |
|
|
|
response = CustomStreamWrapper( |
|
model_response, |
|
model, |
|
custom_llm_provider="baseten", |
|
logging_obj=logging, |
|
) |
|
return response |
|
response = model_response |
|
elif custom_llm_provider == "petals" or model in litellm.petals_models: |
|
api_base = api_base or litellm.api_base |
|
|
|
custom_llm_provider = "petals" |
|
stream = optional_params.pop("stream", False) |
|
model_response = petals_handler.completion( |
|
model=model, |
|
messages=messages, |
|
api_base=api_base, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
client=client, |
|
) |
|
if stream is True: |
|
|
|
resp_string = model_response["choices"][0]["message"]["content"] |
|
response = CustomStreamWrapper( |
|
resp_string, |
|
model, |
|
custom_llm_provider="petals", |
|
logging_obj=logging, |
|
) |
|
return response |
|
response = model_response |
|
elif custom_llm_provider == "custom": |
|
url = litellm.api_base or api_base or "" |
|
if url is None or url == "": |
|
raise ValueError( |
|
"api_base not set. Set api_base or litellm.api_base for custom endpoints" |
|
) |
|
|
|
""" |
|
assume input to custom LLM api bases follow this format: |
|
resp = litellm.module_level_client.post( |
|
api_base, |
|
json={ |
|
'model': 'meta-llama/Llama-2-13b-hf', # model name |
|
'params': { |
|
'prompt': ["The capital of France is P"], |
|
'max_tokens': 32, |
|
'temperature': 0.7, |
|
'top_p': 1.0, |
|
'top_k': 40, |
|
} |
|
} |
|
) |
|
|
|
""" |
|
prompt = " ".join([message["content"] for message in messages]) |
|
resp = litellm.module_level_client.post( |
|
url, |
|
json={ |
|
"model": model, |
|
"params": { |
|
"prompt": [prompt], |
|
"max_tokens": max_tokens, |
|
"temperature": temperature, |
|
"top_p": top_p, |
|
"top_k": kwargs.get("top_k", 40), |
|
}, |
|
}, |
|
) |
|
response_json = resp.json() |
|
""" |
|
assume all responses from custom api_bases of this format: |
|
{ |
|
'data': [ |
|
{ |
|
'prompt': 'The capital of France is P', |
|
'output': ['The capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France is PARIS.\nThe capital of France'], |
|
'params': {'temperature': 0.7, 'top_k': 40, 'top_p': 1}}], |
|
'message': 'ok' |
|
} |
|
] |
|
} |
|
""" |
|
string_response = response_json["data"][0]["output"][0] |
|
|
|
model_response.choices[0].message.content = string_response |
|
model_response.created = int(time.time()) |
|
model_response.model = model |
|
response = model_response |
|
elif ( |
|
custom_llm_provider in litellm._custom_providers |
|
): |
|
|
|
custom_handler: Optional[CustomLLM] = None |
|
for item in litellm.custom_provider_map: |
|
if item["provider"] == custom_llm_provider: |
|
custom_handler = item["custom_handler"] |
|
|
|
if custom_handler is None: |
|
raise ValueError( |
|
f"Unable to map your input to a model. Check your input - {args}" |
|
) |
|
|
|
|
|
handler_fn = custom_chat_llm_router( |
|
async_fn=acompletion, stream=stream, custom_llm=custom_handler |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
|
|
response = handler_fn( |
|
model=model, |
|
messages=messages, |
|
headers=headers, |
|
model_response=model_response, |
|
print_verbose=print_verbose, |
|
api_key=api_key, |
|
api_base=api_base, |
|
acompletion=acompletion, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params, |
|
logger_fn=logger_fn, |
|
timeout=timeout, |
|
custom_prompt_dict=custom_prompt_dict, |
|
client=client, |
|
encoding=encoding, |
|
) |
|
if stream is True: |
|
return CustomStreamWrapper( |
|
completion_stream=response, |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
logging_obj=logging, |
|
) |
|
|
|
else: |
|
raise ValueError( |
|
f"Unable to map your input to a model. Check your input - {args}" |
|
) |
|
return response |
|
except Exception as e: |
|
|
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=args, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
def completion_with_retries(*args, **kwargs): |
|
""" |
|
Executes a litellm.completion() with 3 retries |
|
""" |
|
try: |
|
import tenacity |
|
except Exception as e: |
|
raise Exception( |
|
f"tenacity import failed please run `pip install tenacity`. Error{e}" |
|
) |
|
|
|
num_retries = kwargs.pop("num_retries", 3) |
|
|
|
kwargs["max_retries"] = 0 |
|
kwargs["num_retries"] = 0 |
|
retry_strategy: Literal["exponential_backoff_retry", "constant_retry"] = kwargs.pop( |
|
"retry_strategy", "constant_retry" |
|
) |
|
original_function = kwargs.pop("original_function", completion) |
|
if retry_strategy == "exponential_backoff_retry": |
|
retryer = tenacity.Retrying( |
|
wait=tenacity.wait_exponential(multiplier=1, max=10), |
|
stop=tenacity.stop_after_attempt(num_retries), |
|
reraise=True, |
|
) |
|
else: |
|
retryer = tenacity.Retrying( |
|
stop=tenacity.stop_after_attempt(num_retries), reraise=True |
|
) |
|
return retryer(original_function, *args, **kwargs) |
|
|
|
|
|
async def acompletion_with_retries(*args, **kwargs): |
|
""" |
|
[DEPRECATED]. Use 'acompletion' or router.acompletion instead! |
|
Executes a litellm.completion() with 3 retries |
|
""" |
|
try: |
|
import tenacity |
|
except Exception as e: |
|
raise Exception( |
|
f"tenacity import failed please run `pip install tenacity`. Error{e}" |
|
) |
|
|
|
num_retries = kwargs.pop("num_retries", 3) |
|
kwargs["max_retries"] = 0 |
|
kwargs["num_retries"] = 0 |
|
retry_strategy = kwargs.pop("retry_strategy", "constant_retry") |
|
original_function = kwargs.pop("original_function", completion) |
|
if retry_strategy == "exponential_backoff_retry": |
|
retryer = tenacity.Retrying( |
|
wait=tenacity.wait_exponential(multiplier=1, max=10), |
|
stop=tenacity.stop_after_attempt(num_retries), |
|
reraise=True, |
|
) |
|
else: |
|
retryer = tenacity.Retrying( |
|
stop=tenacity.stop_after_attempt(num_retries), reraise=True |
|
) |
|
return await retryer(original_function, *args, **kwargs) |
|
|
|
|
|
|
|
@client |
|
async def aembedding(*args, **kwargs) -> EmbeddingResponse: |
|
""" |
|
Asynchronously calls the `embedding` function with the given arguments and keyword arguments. |
|
|
|
Parameters: |
|
- `args` (tuple): Positional arguments to be passed to the `embedding` function. |
|
- `kwargs` (dict): Keyword arguments to be passed to the `embedding` function. |
|
|
|
Returns: |
|
- `response` (Any): The response returned by the `embedding` function. |
|
""" |
|
loop = asyncio.get_event_loop() |
|
model = args[0] if len(args) > 0 else kwargs["model"] |
|
|
|
kwargs["aembedding"] = True |
|
custom_llm_provider = None |
|
try: |
|
|
|
func = partial(embedding, *args, **kwargs) |
|
|
|
|
|
ctx = contextvars.copy_context() |
|
func_with_context = partial(ctx.run, func) |
|
|
|
_, custom_llm_provider, _, _ = get_llm_provider( |
|
model=model, api_base=kwargs.get("api_base", None) |
|
) |
|
|
|
|
|
init_response = await loop.run_in_executor(None, func_with_context) |
|
|
|
response: Optional[EmbeddingResponse] = None |
|
if isinstance(init_response, dict): |
|
response = EmbeddingResponse(**init_response) |
|
elif isinstance(init_response, EmbeddingResponse): |
|
response = init_response |
|
elif asyncio.iscoroutine(init_response): |
|
response = await init_response |
|
|
|
if ( |
|
response is not None |
|
and isinstance(response, EmbeddingResponse) |
|
and hasattr(response, "_hidden_params") |
|
): |
|
response._hidden_params["custom_llm_provider"] = custom_llm_provider |
|
|
|
if response is None: |
|
raise ValueError( |
|
"Unable to get Embedding Response. Please pass a valid llm_provider." |
|
) |
|
return response |
|
except Exception as e: |
|
custom_llm_provider = custom_llm_provider or "openai" |
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=args, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
@client |
|
def embedding( |
|
model, |
|
input=[], |
|
|
|
dimensions: Optional[int] = None, |
|
encoding_format: Optional[str] = None, |
|
timeout=600, |
|
|
|
api_base: Optional[str] = None, |
|
api_version: Optional[str] = None, |
|
api_key: Optional[str] = None, |
|
api_type: Optional[str] = None, |
|
caching: bool = False, |
|
user: Optional[str] = None, |
|
custom_llm_provider=None, |
|
litellm_call_id=None, |
|
logger_fn=None, |
|
**kwargs, |
|
) -> EmbeddingResponse: |
|
""" |
|
Embedding function that calls an API to generate embeddings for the given input. |
|
|
|
Parameters: |
|
- model: The embedding model to use. |
|
- input: The input for which embeddings are to be generated. |
|
- encoding_format: Optional[str] The format to return the embeddings in. Can be either `float` or `base64` |
|
- dimensions: The number of dimensions the resulting output embeddings should have. Only supported in text-embedding-3 and later models. |
|
- timeout: The timeout value for the API call, default 10 mins |
|
- litellm_call_id: The call ID for litellm logging. |
|
- litellm_logging_obj: The litellm logging object. |
|
- logger_fn: The logger function. |
|
- api_base: Optional. The base URL for the API. |
|
- api_version: Optional. The version of the API. |
|
- api_key: Optional. The API key to use. |
|
- api_type: Optional. The type of the API. |
|
- caching: A boolean indicating whether to enable caching. |
|
- custom_llm_provider: The custom llm provider. |
|
|
|
Returns: |
|
- response: The response received from the API call. |
|
|
|
Raises: |
|
- exception_type: If an exception occurs during the API call. |
|
""" |
|
azure = kwargs.get("azure", None) |
|
client = kwargs.pop("client", None) |
|
rpm = kwargs.pop("rpm", None) |
|
tpm = kwargs.pop("tpm", None) |
|
max_retries = kwargs.get("max_retries", None) |
|
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") |
|
cooldown_time = kwargs.get("cooldown_time", None) |
|
mock_response: Optional[List[float]] = kwargs.get("mock_response", None) |
|
max_parallel_requests = kwargs.pop("max_parallel_requests", None) |
|
azure_ad_token_provider = kwargs.pop("azure_ad_token_provider", None) |
|
model_info = kwargs.get("model_info", None) |
|
metadata = kwargs.get("metadata", None) |
|
proxy_server_request = kwargs.get("proxy_server_request", None) |
|
aembedding = kwargs.get("aembedding", None) |
|
extra_headers = kwargs.get("extra_headers", None) |
|
headers = kwargs.get("headers", None) |
|
|
|
input_cost_per_token = kwargs.get("input_cost_per_token", None) |
|
output_cost_per_token = kwargs.get("output_cost_per_token", None) |
|
input_cost_per_second = kwargs.get("input_cost_per_second", None) |
|
output_cost_per_second = kwargs.get("output_cost_per_second", None) |
|
openai_params = [ |
|
"user", |
|
"dimensions", |
|
"request_timeout", |
|
"api_base", |
|
"api_version", |
|
"api_key", |
|
"deployment_id", |
|
"organization", |
|
"base_url", |
|
"default_headers", |
|
"timeout", |
|
"max_retries", |
|
"encoding_format", |
|
] |
|
litellm_params = [ |
|
"aembedding", |
|
"extra_headers", |
|
] + all_litellm_params |
|
|
|
default_params = openai_params + litellm_params |
|
non_default_params = { |
|
k: v for k, v in kwargs.items() if k not in default_params |
|
} |
|
|
|
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
api_key=api_key, |
|
) |
|
|
|
if dynamic_api_key is not None: |
|
api_key = dynamic_api_key |
|
|
|
optional_params = get_optional_params_embeddings( |
|
model=model, |
|
user=user, |
|
dimensions=dimensions, |
|
encoding_format=encoding_format, |
|
custom_llm_provider=custom_llm_provider, |
|
**non_default_params, |
|
) |
|
|
|
|
|
if input_cost_per_token is not None and output_cost_per_token is not None: |
|
litellm.register_model( |
|
{ |
|
f"{custom_llm_provider}/{model}": { |
|
"input_cost_per_token": input_cost_per_token, |
|
"output_cost_per_token": output_cost_per_token, |
|
"litellm_provider": custom_llm_provider, |
|
} |
|
} |
|
) |
|
if input_cost_per_second is not None: |
|
output_cost_per_second = output_cost_per_second or 0.0 |
|
litellm.register_model( |
|
{ |
|
f"{custom_llm_provider}/{model}": { |
|
"input_cost_per_second": input_cost_per_second, |
|
"output_cost_per_second": output_cost_per_second, |
|
"litellm_provider": custom_llm_provider, |
|
} |
|
} |
|
) |
|
litellm_params_dict = get_litellm_params(**kwargs) |
|
|
|
logging: Logging = litellm_logging_obj |
|
logging.update_environment_variables( |
|
model=model, |
|
user=user, |
|
optional_params=optional_params, |
|
litellm_params=litellm_params_dict, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
|
|
if mock_response is not None: |
|
return mock_embedding(model=model, mock_response=mock_response) |
|
try: |
|
response: Optional[EmbeddingResponse] = None |
|
|
|
if azure is True or custom_llm_provider == "azure": |
|
|
|
api_type = get_secret_str("AZURE_API_TYPE") or "azure" |
|
|
|
api_base = api_base or litellm.api_base or get_secret_str("AZURE_API_BASE") |
|
|
|
api_version = ( |
|
api_version |
|
or litellm.api_version |
|
or get_secret_str("AZURE_API_VERSION") |
|
or litellm.AZURE_DEFAULT_API_VERSION |
|
) |
|
|
|
azure_ad_token = optional_params.pop( |
|
"azure_ad_token", None |
|
) or get_secret_str("AZURE_AD_TOKEN") |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.azure_key |
|
or get_secret_str("AZURE_API_KEY") |
|
) |
|
|
|
if api_base is None: |
|
raise ValueError( |
|
"No API Base provided for Azure OpenAI LLM provider. Set 'AZURE_API_BASE' in .env" |
|
) |
|
|
|
|
|
response = azure_chat_completions.embedding( |
|
model=model, |
|
input=input, |
|
api_base=api_base, |
|
api_key=api_key, |
|
api_version=api_version, |
|
azure_ad_token=azure_ad_token, |
|
azure_ad_token_provider=azure_ad_token_provider, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
max_retries=max_retries, |
|
headers=headers or extra_headers, |
|
) |
|
elif ( |
|
model in litellm.open_ai_embedding_models |
|
or custom_llm_provider == "openai" |
|
or custom_llm_provider == "together_ai" |
|
or custom_llm_provider == "nvidia_nim" |
|
): |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret_str("OPENAI_API_BASE") |
|
or "https://api.openai.com/v1" |
|
) |
|
openai.organization = ( |
|
litellm.organization |
|
or get_secret_str("OPENAI_ORGANIZATION") |
|
or None |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret_str("OPENAI_API_KEY") |
|
) |
|
|
|
if extra_headers is not None: |
|
optional_params["extra_headers"] = extra_headers |
|
|
|
api_type = "openai" |
|
api_version = None |
|
|
|
|
|
response = openai_chat_completions.embedding( |
|
model=model, |
|
input=input, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
max_retries=max_retries, |
|
) |
|
elif custom_llm_provider == "databricks": |
|
api_base = api_base or litellm.api_base or get_secret("DATABRICKS_API_BASE") |
|
|
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.databricks_key |
|
or get_secret("DATABRICKS_API_KEY") |
|
) |
|
|
|
|
|
response = databricks_embedding.embedding( |
|
model=model, |
|
input=input, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
) |
|
elif ( |
|
custom_llm_provider == "openai_like" |
|
or custom_llm_provider == "jina_ai" |
|
or custom_llm_provider == "hosted_vllm" |
|
or custom_llm_provider == "lm_studio" |
|
): |
|
api_base = ( |
|
api_base or litellm.api_base or get_secret_str("OPENAI_LIKE_API_BASE") |
|
) |
|
|
|
|
|
if api_key is None: |
|
api_key = ( |
|
litellm.api_key |
|
or litellm.openai_like_key |
|
or get_secret_str("OPENAI_LIKE_API_KEY") |
|
) |
|
|
|
|
|
response = openai_like_embedding.embedding( |
|
model=model, |
|
input=input, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
) |
|
elif custom_llm_provider == "cohere" or custom_llm_provider == "cohere_chat": |
|
cohere_key = ( |
|
api_key |
|
or litellm.cohere_key |
|
or get_secret("COHERE_API_KEY") |
|
or get_secret("CO_API_KEY") |
|
or litellm.api_key |
|
) |
|
|
|
if extra_headers is not None and isinstance(extra_headers, dict): |
|
headers = extra_headers |
|
else: |
|
headers = {} |
|
response = cohere_embed.embedding( |
|
model=model, |
|
input=input, |
|
optional_params=optional_params, |
|
encoding=encoding, |
|
api_key=cohere_key, |
|
headers=headers, |
|
logging_obj=logging, |
|
model_response=EmbeddingResponse(), |
|
aembedding=aembedding, |
|
timeout=timeout, |
|
client=client, |
|
) |
|
elif custom_llm_provider == "huggingface": |
|
api_key = ( |
|
api_key |
|
or litellm.huggingface_key |
|
or get_secret("HUGGINGFACE_API_KEY") |
|
or litellm.api_key |
|
) |
|
response = huggingface.embedding( |
|
model=model, |
|
input=input, |
|
encoding=encoding, |
|
api_key=api_key, |
|
api_base=api_base, |
|
logging_obj=logging, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
) |
|
elif custom_llm_provider == "bedrock": |
|
if isinstance(input, str): |
|
transformed_input = [input] |
|
else: |
|
transformed_input = input |
|
response = bedrock_embedding.embeddings( |
|
model=model, |
|
input=transformed_input, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
model_response=EmbeddingResponse(), |
|
client=client, |
|
timeout=timeout, |
|
aembedding=aembedding, |
|
litellm_params={}, |
|
api_base=api_base, |
|
print_verbose=print_verbose, |
|
extra_headers=extra_headers, |
|
) |
|
elif custom_llm_provider == "triton": |
|
if api_base is None: |
|
raise ValueError( |
|
"api_base is required for triton. Please pass `api_base`" |
|
) |
|
response = base_llm_http_handler.embedding( |
|
model=model, |
|
input=input, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
litellm_params={}, |
|
) |
|
elif custom_llm_provider == "gemini": |
|
gemini_api_key = ( |
|
api_key or get_secret_str("GEMINI_API_KEY") or litellm.api_key |
|
) |
|
|
|
response = google_batch_embeddings.batch_embeddings( |
|
model=model, |
|
input=input, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
model_response=EmbeddingResponse(), |
|
vertex_project=None, |
|
vertex_location=None, |
|
vertex_credentials=None, |
|
aembedding=aembedding, |
|
print_verbose=print_verbose, |
|
custom_llm_provider="gemini", |
|
api_key=gemini_api_key, |
|
) |
|
|
|
elif custom_llm_provider == "vertex_ai": |
|
vertex_ai_project = ( |
|
optional_params.pop("vertex_project", None) |
|
or optional_params.pop("vertex_ai_project", None) |
|
or litellm.vertex_project |
|
or get_secret_str("VERTEXAI_PROJECT") |
|
or get_secret_str("VERTEX_PROJECT") |
|
) |
|
vertex_ai_location = ( |
|
optional_params.pop("vertex_location", None) |
|
or optional_params.pop("vertex_ai_location", None) |
|
or litellm.vertex_location |
|
or get_secret_str("VERTEXAI_LOCATION") |
|
or get_secret_str("VERTEX_LOCATION") |
|
) |
|
vertex_credentials = ( |
|
optional_params.pop("vertex_credentials", None) |
|
or optional_params.pop("vertex_ai_credentials", None) |
|
or get_secret_str("VERTEXAI_CREDENTIALS") |
|
or get_secret_str("VERTEX_CREDENTIALS") |
|
) |
|
|
|
if ( |
|
"image" in optional_params |
|
or "video" in optional_params |
|
or model |
|
in vertex_multimodal_embedding.SUPPORTED_MULTIMODAL_EMBEDDING_MODELS |
|
): |
|
|
|
response = vertex_multimodal_embedding.multimodal_embedding( |
|
model=model, |
|
input=input, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
model_response=EmbeddingResponse(), |
|
vertex_project=vertex_ai_project, |
|
vertex_location=vertex_ai_location, |
|
vertex_credentials=vertex_credentials, |
|
aembedding=aembedding, |
|
print_verbose=print_verbose, |
|
custom_llm_provider="vertex_ai", |
|
) |
|
else: |
|
response = vertex_embedding.embedding( |
|
model=model, |
|
input=input, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
model_response=EmbeddingResponse(), |
|
vertex_project=vertex_ai_project, |
|
vertex_location=vertex_ai_location, |
|
vertex_credentials=vertex_credentials, |
|
custom_llm_provider="vertex_ai", |
|
timeout=timeout, |
|
aembedding=aembedding, |
|
print_verbose=print_verbose, |
|
api_key=api_key, |
|
) |
|
elif custom_llm_provider == "oobabooga": |
|
response = oobabooga.embedding( |
|
model=model, |
|
input=input, |
|
encoding=encoding, |
|
api_base=api_base, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
model_response=EmbeddingResponse(), |
|
api_key=api_key, |
|
) |
|
elif custom_llm_provider == "ollama": |
|
api_base = ( |
|
litellm.api_base |
|
or api_base |
|
or get_secret_str("OLLAMA_API_BASE") |
|
or "http://localhost:11434" |
|
) |
|
|
|
if isinstance(input, str): |
|
input = [input] |
|
if not all(isinstance(item, str) for item in input): |
|
raise litellm.BadRequestError( |
|
message=f"Invalid input for ollama embeddings. input={input}", |
|
model=model, |
|
llm_provider="ollama", |
|
) |
|
ollama_embeddings_fn = ( |
|
ollama.ollama_aembeddings |
|
if aembedding is True |
|
else ollama.ollama_embeddings |
|
) |
|
response = ollama_embeddings_fn( |
|
api_base=api_base, |
|
model=model, |
|
prompts=input, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
model_response=EmbeddingResponse(), |
|
) |
|
elif custom_llm_provider == "sagemaker": |
|
response = sagemaker_llm.embedding( |
|
model=model, |
|
input=input, |
|
encoding=encoding, |
|
logging_obj=logging, |
|
optional_params=optional_params, |
|
model_response=EmbeddingResponse(), |
|
print_verbose=print_verbose, |
|
) |
|
elif custom_llm_provider == "mistral": |
|
api_key = api_key or litellm.api_key or get_secret_str("MISTRAL_API_KEY") |
|
response = openai_chat_completions.embedding( |
|
model=model, |
|
input=input, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
) |
|
elif custom_llm_provider == "fireworks_ai": |
|
api_key = ( |
|
api_key or litellm.api_key or get_secret_str("FIREWORKS_AI_API_KEY") |
|
) |
|
response = openai_chat_completions.embedding( |
|
model=model, |
|
input=input, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
) |
|
elif custom_llm_provider == "voyage": |
|
response = base_llm_http_handler.embedding( |
|
model=model, |
|
input=input, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
litellm_params={}, |
|
) |
|
elif custom_llm_provider == "watsonx": |
|
credentials = IBMWatsonXMixin.get_watsonx_credentials( |
|
optional_params=optional_params, api_key=api_key, api_base=api_base |
|
) |
|
|
|
api_key = credentials["api_key"] |
|
api_base = credentials["api_base"] |
|
|
|
if "token" in credentials: |
|
optional_params["token"] = credentials["token"] |
|
|
|
response = base_llm_http_handler.embedding( |
|
model=model, |
|
input=input, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
litellm_params={}, |
|
client=client, |
|
aembedding=aembedding, |
|
) |
|
elif custom_llm_provider == "xinference": |
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or get_secret_str("XINFERENCE_API_KEY") |
|
or "stub-xinference-key" |
|
) |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret_str("XINFERENCE_API_BASE") |
|
or "http://127.0.0.1:9997/v1" |
|
) |
|
response = openai_chat_completions.embedding( |
|
model=model, |
|
input=input, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
) |
|
elif custom_llm_provider == "azure_ai": |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret_str("AZURE_AI_API_BASE") |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret_str("AZURE_AI_API_KEY") |
|
) |
|
|
|
|
|
response = azure_ai_embedding.embedding( |
|
model=model, |
|
input=input, |
|
api_base=api_base, |
|
api_key=api_key, |
|
logging_obj=logging, |
|
timeout=timeout, |
|
model_response=EmbeddingResponse(), |
|
optional_params=optional_params, |
|
client=client, |
|
aembedding=aembedding, |
|
) |
|
else: |
|
args = locals() |
|
raise ValueError(f"No valid embedding model args passed in - {args}") |
|
if response is not None and hasattr(response, "_hidden_params"): |
|
response._hidden_params["custom_llm_provider"] = custom_llm_provider |
|
|
|
if response is None: |
|
args = locals() |
|
raise ValueError(f"No valid embedding model args passed in - {args}") |
|
return response |
|
except Exception as e: |
|
|
|
litellm_logging_obj.post_call( |
|
input=input, |
|
api_key=api_key, |
|
original_response=str(e), |
|
) |
|
|
|
raise exception_type( |
|
model=model, |
|
original_exception=e, |
|
custom_llm_provider=custom_llm_provider, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
|
|
@client |
|
async def atext_completion( |
|
*args, **kwargs |
|
) -> Union[TextCompletionResponse, TextCompletionStreamWrapper]: |
|
""" |
|
Implemented to handle async streaming for the text completion endpoint |
|
""" |
|
loop = asyncio.get_event_loop() |
|
model = args[0] if len(args) > 0 else kwargs["model"] |
|
|
|
kwargs["acompletion"] = True |
|
custom_llm_provider = None |
|
try: |
|
|
|
func = partial(text_completion, *args, **kwargs) |
|
|
|
|
|
ctx = contextvars.copy_context() |
|
func_with_context = partial(ctx.run, func) |
|
|
|
_, custom_llm_provider, _, _ = get_llm_provider( |
|
model=model, api_base=kwargs.get("api_base", None) |
|
) |
|
|
|
if ( |
|
custom_llm_provider == "openai" |
|
or custom_llm_provider == "azure" |
|
or custom_llm_provider == "azure_text" |
|
or custom_llm_provider == "custom_openai" |
|
or custom_llm_provider == "anyscale" |
|
or custom_llm_provider == "mistral" |
|
or custom_llm_provider == "openrouter" |
|
or custom_llm_provider == "deepinfra" |
|
or custom_llm_provider == "perplexity" |
|
or custom_llm_provider == "groq" |
|
or custom_llm_provider == "nvidia_nim" |
|
or custom_llm_provider == "cerebras" |
|
or custom_llm_provider == "sambanova" |
|
or custom_llm_provider == "ai21_chat" |
|
or custom_llm_provider == "ai21" |
|
or custom_llm_provider == "volcengine" |
|
or custom_llm_provider == "text-completion-codestral" |
|
or custom_llm_provider == "deepseek" |
|
or custom_llm_provider == "text-completion-openai" |
|
or custom_llm_provider == "huggingface" |
|
or custom_llm_provider == "ollama" |
|
or custom_llm_provider == "vertex_ai" |
|
or custom_llm_provider in litellm.openai_compatible_providers |
|
): |
|
|
|
response = await loop.run_in_executor(None, func_with_context) |
|
if asyncio.iscoroutine(response): |
|
response = await response |
|
else: |
|
|
|
response = await loop.run_in_executor(None, func_with_context) |
|
if ( |
|
kwargs.get("stream", False) is True |
|
or isinstance(response, TextCompletionStreamWrapper) |
|
or isinstance(response, CustomStreamWrapper) |
|
): |
|
return TextCompletionStreamWrapper( |
|
completion_stream=_async_streaming( |
|
response=response, |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
args=args, |
|
), |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
else: |
|
|
|
if isinstance(response, TextCompletionResponse): |
|
return response |
|
elif asyncio.iscoroutine(response): |
|
response = await response |
|
|
|
text_completion_response = TextCompletionResponse() |
|
text_completion_response = litellm.utils.LiteLLMResponseObjectHandler.convert_chat_to_text_completion( |
|
text_completion_response=text_completion_response, |
|
response=response, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
return text_completion_response |
|
except Exception as e: |
|
custom_llm_provider = custom_llm_provider or "openai" |
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=args, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
@client |
|
def text_completion( |
|
prompt: Union[ |
|
str, List[Union[str, List[Union[str, List[int]]]]] |
|
], |
|
model: Optional[str] = None, |
|
best_of: Optional[ |
|
int |
|
] = None, |
|
echo: Optional[ |
|
bool |
|
] = None, |
|
frequency_penalty: Optional[ |
|
float |
|
] = None, |
|
logit_bias: Optional[ |
|
Dict[int, int] |
|
] = None, |
|
logprobs: Optional[ |
|
int |
|
] = None, |
|
max_tokens: Optional[ |
|
int |
|
] = None, |
|
n: Optional[ |
|
int |
|
] = None, |
|
presence_penalty: Optional[ |
|
float |
|
] = None, |
|
stop: Optional[ |
|
Union[str, List[str]] |
|
] = None, |
|
stream: Optional[bool] = None, |
|
stream_options: Optional[dict] = None, |
|
suffix: Optional[ |
|
str |
|
] = None, |
|
temperature: Optional[float] = None, |
|
top_p: Optional[float] = None, |
|
user: Optional[ |
|
str |
|
] = None, |
|
|
|
api_base: Optional[str] = None, |
|
api_version: Optional[str] = None, |
|
api_key: Optional[str] = None, |
|
model_list: Optional[list] = None, |
|
|
|
custom_llm_provider: Optional[str] = None, |
|
*args, |
|
**kwargs, |
|
): |
|
import copy |
|
|
|
""" |
|
Generate text completions using the OpenAI API. |
|
|
|
Args: |
|
model (str): ID of the model to use. |
|
prompt (Union[str, List[Union[str, List[Union[str, List[int]]]]]): The prompt(s) to generate completions for. |
|
best_of (Optional[int], optional): Generates best_of completions server-side. Defaults to 1. |
|
echo (Optional[bool], optional): Echo back the prompt in addition to the completion. Defaults to False. |
|
frequency_penalty (Optional[float], optional): Penalize new tokens based on their existing frequency. Defaults to 0. |
|
logit_bias (Optional[Dict[int, int]], optional): Modify the likelihood of specified tokens. Defaults to None. |
|
logprobs (Optional[int], optional): Include the log probabilities on the most likely tokens. Defaults to None. |
|
max_tokens (Optional[int], optional): The maximum number of tokens to generate in the completion. Defaults to 16. |
|
n (Optional[int], optional): How many completions to generate for each prompt. Defaults to 1. |
|
presence_penalty (Optional[float], optional): Penalize new tokens based on whether they appear in the text so far. Defaults to 0. |
|
stop (Optional[Union[str, List[str]]], optional): Sequences where the API will stop generating further tokens. Defaults to None. |
|
stream (Optional[bool], optional): Whether to stream back partial progress. Defaults to False. |
|
suffix (Optional[str], optional): The suffix that comes after a completion of inserted text. Defaults to None. |
|
temperature (Optional[float], optional): Sampling temperature to use. Defaults to 1. |
|
top_p (Optional[float], optional): Nucleus sampling parameter. Defaults to 1. |
|
user (Optional[str], optional): A unique identifier representing your end-user. |
|
Returns: |
|
TextCompletionResponse: A response object containing the generated completion and associated metadata. |
|
|
|
Example: |
|
Your example of how to use this function goes here. |
|
""" |
|
if "engine" in kwargs: |
|
_engine = kwargs["engine"] |
|
if model is None and isinstance(_engine, str): |
|
|
|
model = _engine |
|
kwargs.pop("engine") |
|
|
|
text_completion_response = TextCompletionResponse() |
|
|
|
optional_params: Dict[str, Any] = {} |
|
|
|
if best_of is not None: |
|
optional_params["best_of"] = best_of |
|
if echo is not None: |
|
optional_params["echo"] = echo |
|
if frequency_penalty is not None: |
|
optional_params["frequency_penalty"] = frequency_penalty |
|
if logit_bias is not None: |
|
optional_params["logit_bias"] = logit_bias |
|
if logprobs is not None: |
|
optional_params["logprobs"] = logprobs |
|
if max_tokens is not None: |
|
optional_params["max_tokens"] = max_tokens |
|
if n is not None: |
|
optional_params["n"] = n |
|
if presence_penalty is not None: |
|
optional_params["presence_penalty"] = presence_penalty |
|
if stop is not None: |
|
optional_params["stop"] = stop |
|
if stream is not None: |
|
optional_params["stream"] = stream |
|
if stream_options is not None: |
|
optional_params["stream_options"] = stream_options |
|
if suffix is not None: |
|
optional_params["suffix"] = suffix |
|
if temperature is not None: |
|
optional_params["temperature"] = temperature |
|
if top_p is not None: |
|
optional_params["top_p"] = top_p |
|
if user is not None: |
|
optional_params["user"] = user |
|
if api_base is not None: |
|
optional_params["api_base"] = api_base |
|
if api_version is not None: |
|
optional_params["api_version"] = api_version |
|
if api_key is not None: |
|
optional_params["api_key"] = api_key |
|
if custom_llm_provider is not None: |
|
optional_params["custom_llm_provider"] = custom_llm_provider |
|
|
|
|
|
_model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
) |
|
|
|
if custom_llm_provider == "huggingface": |
|
|
|
if echo is True: |
|
|
|
if "top_n_tokens" not in kwargs: |
|
kwargs["top_n_tokens"] = 3 |
|
|
|
|
|
if isinstance(prompt, list): |
|
import concurrent.futures |
|
|
|
tokenizer = tiktoken.encoding_for_model("text-davinci-003") |
|
|
|
if len(prompt) > 0 and isinstance(prompt[0], list): |
|
responses = [None for x in prompt] |
|
|
|
def process_prompt(i, individual_prompt): |
|
decoded_prompt = tokenizer.decode(individual_prompt) |
|
all_params = {**kwargs, **optional_params} |
|
response: TextCompletionResponse = text_completion( |
|
model=model, |
|
prompt=decoded_prompt, |
|
num_retries=3, |
|
*args, |
|
**all_params, |
|
) |
|
|
|
text_completion_response["id"] = response.get("id", None) |
|
text_completion_response["object"] = "text_completion" |
|
text_completion_response["created"] = response.get("created", None) |
|
text_completion_response["model"] = response.get("model", None) |
|
return response["choices"][0] |
|
|
|
with concurrent.futures.ThreadPoolExecutor() as executor: |
|
completed_futures = [ |
|
executor.submit(process_prompt, i, individual_prompt) |
|
for i, individual_prompt in enumerate(prompt) |
|
] |
|
for i, future in enumerate( |
|
concurrent.futures.as_completed(completed_futures) |
|
): |
|
responses[i] = future.result() |
|
text_completion_response.choices = responses |
|
|
|
return text_completion_response |
|
|
|
|
|
|
|
|
|
|
|
|
|
messages = [] |
|
if isinstance(prompt, list) and len(prompt) > 0 and isinstance(prompt[0], str): |
|
for p in prompt: |
|
message = {"role": "user", "content": p} |
|
messages.append(message) |
|
elif isinstance(prompt, str): |
|
messages = [{"role": "user", "content": prompt}] |
|
elif ( |
|
( |
|
custom_llm_provider == "openai" |
|
or custom_llm_provider == "azure" |
|
or custom_llm_provider == "azure_text" |
|
or custom_llm_provider == "text-completion-codestral" |
|
or custom_llm_provider == "text-completion-openai" |
|
) |
|
and isinstance(prompt, list) |
|
and len(prompt) > 0 |
|
and isinstance(prompt[0], list) |
|
): |
|
verbose_logger.warning( |
|
msg="List of lists being passed. If this is for tokens, then it might not work across all models." |
|
) |
|
messages = [{"role": "user", "content": prompt}] |
|
else: |
|
raise Exception( |
|
f"Unmapped prompt format. Your prompt is neither a list of strings nor a string. prompt={prompt}. File an issue - https://github.com/BerriAI/litellm/issues" |
|
) |
|
|
|
kwargs.pop("prompt", None) |
|
|
|
if _model is not None and ( |
|
custom_llm_provider == "openai" |
|
): |
|
if _model not in litellm.open_ai_chat_completion_models: |
|
model = "text-completion-openai/" + _model |
|
optional_params.pop("custom_llm_provider", None) |
|
|
|
if model is None: |
|
raise ValueError("model is not set. Set either via 'model' or 'engine' param.") |
|
kwargs["text_completion"] = True |
|
response = completion( |
|
model=model, |
|
messages=messages, |
|
*args, |
|
**kwargs, |
|
**optional_params, |
|
) |
|
if kwargs.get("acompletion", False) is True: |
|
return response |
|
if ( |
|
stream is True |
|
or kwargs.get("stream", False) is True |
|
or isinstance(response, CustomStreamWrapper) |
|
): |
|
response = TextCompletionStreamWrapper( |
|
completion_stream=response, |
|
model=model, |
|
stream_options=stream_options, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
return response |
|
elif isinstance(response, TextCompletionStreamWrapper): |
|
return response |
|
|
|
|
|
if isinstance(response, TextCompletionResponse): |
|
return response |
|
|
|
text_completion_response = ( |
|
litellm.utils.LiteLLMResponseObjectHandler.convert_chat_to_text_completion( |
|
response=response, |
|
text_completion_response=text_completion_response, |
|
) |
|
) |
|
|
|
return text_completion_response |
|
|
|
|
|
|
|
|
|
|
|
async def aadapter_completion( |
|
*, adapter_id: str, **kwargs |
|
) -> Optional[Union[BaseModel, AdapterCompletionStreamWrapper]]: |
|
""" |
|
Implemented to handle async calls for adapter_completion() |
|
""" |
|
try: |
|
translation_obj: Optional[CustomLogger] = None |
|
for item in litellm.adapters: |
|
if item["id"] == adapter_id: |
|
translation_obj = item["adapter"] |
|
|
|
if translation_obj is None: |
|
raise ValueError( |
|
"No matching adapter given. Received 'adapter_id'={}, litellm.adapters={}".format( |
|
adapter_id, litellm.adapters |
|
) |
|
) |
|
|
|
new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs) |
|
|
|
response: Union[ModelResponse, CustomStreamWrapper] = await acompletion(**new_kwargs) |
|
translated_response: Optional[ |
|
Union[BaseModel, AdapterCompletionStreamWrapper] |
|
] = None |
|
if isinstance(response, ModelResponse): |
|
translated_response = translation_obj.translate_completion_output_params( |
|
response=response |
|
) |
|
if isinstance(response, CustomStreamWrapper): |
|
translated_response = ( |
|
translation_obj.translate_completion_output_params_streaming( |
|
completion_stream=response |
|
) |
|
) |
|
|
|
return translated_response |
|
except Exception as e: |
|
raise e |
|
|
|
|
|
def adapter_completion( |
|
*, adapter_id: str, **kwargs |
|
) -> Optional[Union[BaseModel, AdapterCompletionStreamWrapper]]: |
|
translation_obj: Optional[CustomLogger] = None |
|
for item in litellm.adapters: |
|
if item["id"] == adapter_id: |
|
translation_obj = item["adapter"] |
|
|
|
if translation_obj is None: |
|
raise ValueError( |
|
"No matching adapter given. Received 'adapter_id'={}, litellm.adapters={}".format( |
|
adapter_id, litellm.adapters |
|
) |
|
) |
|
|
|
new_kwargs = translation_obj.translate_completion_input_params(kwargs=kwargs) |
|
|
|
response: Union[ModelResponse, CustomStreamWrapper] = completion(**new_kwargs) |
|
translated_response: Optional[Union[BaseModel, AdapterCompletionStreamWrapper]] = ( |
|
None |
|
) |
|
if isinstance(response, ModelResponse): |
|
translated_response = translation_obj.translate_completion_output_params( |
|
response=response |
|
) |
|
elif isinstance(response, CustomStreamWrapper) or inspect.isgenerator(response): |
|
translated_response = ( |
|
translation_obj.translate_completion_output_params_streaming( |
|
completion_stream=response |
|
) |
|
) |
|
|
|
return translated_response |
|
|
|
|
|
|
|
|
|
|
|
def moderation( |
|
input: str, model: Optional[str] = None, api_key: Optional[str] = None, **kwargs |
|
): |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret_str("OPENAI_API_KEY") |
|
) |
|
|
|
openai_client = kwargs.get("client", None) |
|
if openai_client is None: |
|
openai_client = openai.OpenAI( |
|
api_key=api_key, |
|
) |
|
|
|
if model is not None: |
|
response = openai_client.moderations.create(input=input, model=model) |
|
else: |
|
response = openai_client.moderations.create(input=input) |
|
return response |
|
|
|
|
|
@client |
|
async def amoderation( |
|
input: str, |
|
model: Optional[str] = None, |
|
api_key: Optional[str] = None, |
|
custom_llm_provider: Optional[str] = None, |
|
**kwargs, |
|
): |
|
from openai import AsyncOpenAI |
|
|
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret_str("OPENAI_API_KEY") |
|
) |
|
openai_client = kwargs.get("client", None) |
|
if openai_client is None or not isinstance(openai_client, AsyncOpenAI): |
|
|
|
|
|
_openai_client: AsyncOpenAI = openai_chat_completions._get_openai_client( |
|
is_async=True, |
|
api_key=api_key, |
|
) |
|
else: |
|
_openai_client = openai_client |
|
|
|
optional_params = GenericLiteLLMParams(**kwargs) |
|
try: |
|
model, _custom_llm_provider, _dynamic_api_key, _dynamic_api_base = ( |
|
litellm.get_llm_provider( |
|
model=model or "", |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=optional_params.api_base, |
|
api_key=optional_params.api_key, |
|
) |
|
) |
|
except litellm.BadRequestError: |
|
|
|
pass |
|
if model is not None: |
|
response = await _openai_client.moderations.create(input=input, model=model) |
|
else: |
|
response = await _openai_client.moderations.create(input=input) |
|
return response |
|
|
|
|
|
|
|
@client |
|
async def aimage_generation(*args, **kwargs) -> ImageResponse: |
|
""" |
|
Asynchronously calls the `image_generation` function with the given arguments and keyword arguments. |
|
|
|
Parameters: |
|
- `args` (tuple): Positional arguments to be passed to the `image_generation` function. |
|
- `kwargs` (dict): Keyword arguments to be passed to the `image_generation` function. |
|
|
|
Returns: |
|
- `response` (Any): The response returned by the `image_generation` function. |
|
""" |
|
loop = asyncio.get_event_loop() |
|
model = args[0] if len(args) > 0 else kwargs["model"] |
|
|
|
kwargs["aimg_generation"] = True |
|
custom_llm_provider = None |
|
try: |
|
|
|
func = partial(image_generation, *args, **kwargs) |
|
|
|
|
|
ctx = contextvars.copy_context() |
|
func_with_context = partial(ctx.run, func) |
|
|
|
_, custom_llm_provider, _, _ = get_llm_provider( |
|
model=model, api_base=kwargs.get("api_base", None) |
|
) |
|
|
|
|
|
init_response = await loop.run_in_executor(None, func_with_context) |
|
if isinstance(init_response, dict) or isinstance( |
|
init_response, ImageResponse |
|
): |
|
if isinstance(init_response, dict): |
|
init_response = ImageResponse(**init_response) |
|
response = init_response |
|
elif asyncio.iscoroutine(init_response): |
|
response = await init_response |
|
else: |
|
|
|
response = await loop.run_in_executor(None, func_with_context) |
|
return response |
|
except Exception as e: |
|
custom_llm_provider = custom_llm_provider or "openai" |
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=args, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
@client |
|
def image_generation( |
|
prompt: str, |
|
model: Optional[str] = None, |
|
n: Optional[int] = None, |
|
quality: Optional[str] = None, |
|
response_format: Optional[str] = None, |
|
size: Optional[str] = None, |
|
style: Optional[str] = None, |
|
user: Optional[str] = None, |
|
timeout=600, |
|
api_key: Optional[str] = None, |
|
api_base: Optional[str] = None, |
|
api_version: Optional[str] = None, |
|
custom_llm_provider=None, |
|
**kwargs, |
|
) -> ImageResponse: |
|
""" |
|
Maps the https://api.openai.com/v1/images/generations endpoint. |
|
|
|
Currently supports just Azure + OpenAI. |
|
""" |
|
try: |
|
args = locals() |
|
aimg_generation = kwargs.get("aimg_generation", False) |
|
litellm_call_id = kwargs.get("litellm_call_id", None) |
|
logger_fn = kwargs.get("logger_fn", None) |
|
mock_response: Optional[str] = kwargs.get("mock_response", None) |
|
proxy_server_request = kwargs.get("proxy_server_request", None) |
|
azure_ad_token_provider = kwargs.get("azure_ad_token_provider", None) |
|
model_info = kwargs.get("model_info", None) |
|
metadata = kwargs.get("metadata", {}) |
|
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") |
|
client = kwargs.get("client", None) |
|
extra_headers = kwargs.get("extra_headers", None) |
|
headers: dict = kwargs.get("headers", None) or {} |
|
if extra_headers is not None: |
|
headers.update(extra_headers) |
|
model_response: ImageResponse = litellm.utils.ImageResponse() |
|
if model is not None or custom_llm_provider is not None: |
|
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=api_base, |
|
) |
|
else: |
|
model = "dall-e-2" |
|
custom_llm_provider = "openai" |
|
model_response._hidden_params["model"] = model |
|
openai_params = [ |
|
"user", |
|
"request_timeout", |
|
"api_base", |
|
"api_version", |
|
"api_key", |
|
"deployment_id", |
|
"organization", |
|
"base_url", |
|
"default_headers", |
|
"timeout", |
|
"max_retries", |
|
"n", |
|
"quality", |
|
"size", |
|
"style", |
|
] |
|
litellm_params = all_litellm_params |
|
default_params = openai_params + litellm_params |
|
non_default_params = { |
|
k: v for k, v in kwargs.items() if k not in default_params |
|
} |
|
optional_params = get_optional_params_image_gen( |
|
model=model, |
|
n=n, |
|
quality=quality, |
|
response_format=response_format, |
|
size=size, |
|
style=style, |
|
user=user, |
|
custom_llm_provider=custom_llm_provider, |
|
**non_default_params, |
|
) |
|
logging: Logging = litellm_logging_obj |
|
logging.update_environment_variables( |
|
model=model, |
|
user=user, |
|
optional_params=optional_params, |
|
litellm_params={ |
|
"timeout": timeout, |
|
"azure": False, |
|
"litellm_call_id": litellm_call_id, |
|
"logger_fn": logger_fn, |
|
"proxy_server_request": proxy_server_request, |
|
"model_info": model_info, |
|
"metadata": metadata, |
|
"preset_cache_key": None, |
|
"stream_response": {}, |
|
}, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
if "custom_llm_provider" not in logging.model_call_details: |
|
logging.model_call_details["custom_llm_provider"] = custom_llm_provider |
|
if mock_response is not None: |
|
return mock_image_generation(model=model, mock_response=mock_response) |
|
|
|
if custom_llm_provider == "azure": |
|
|
|
api_type = get_secret_str("AZURE_API_TYPE") or "azure" |
|
|
|
api_base = api_base or litellm.api_base or get_secret_str("AZURE_API_BASE") |
|
|
|
api_version = ( |
|
api_version |
|
or litellm.api_version |
|
or get_secret_str("AZURE_API_VERSION") |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.azure_key |
|
or get_secret_str("AZURE_OPENAI_API_KEY") |
|
or get_secret_str("AZURE_API_KEY") |
|
) |
|
|
|
azure_ad_token = optional_params.pop( |
|
"azure_ad_token", None |
|
) or get_secret_str("AZURE_AD_TOKEN") |
|
|
|
default_headers = { |
|
"Content-Type": "application/json;", |
|
"api-key": api_key, |
|
} |
|
for k, v in default_headers.items(): |
|
if k not in headers: |
|
headers[k] = v |
|
|
|
model_response = azure_chat_completions.image_generation( |
|
model=model, |
|
prompt=prompt, |
|
timeout=timeout, |
|
api_key=api_key, |
|
api_base=api_base, |
|
azure_ad_token=azure_ad_token, |
|
azure_ad_token_provider=azure_ad_token_provider, |
|
logging_obj=litellm_logging_obj, |
|
optional_params=optional_params, |
|
model_response=model_response, |
|
api_version=api_version, |
|
aimg_generation=aimg_generation, |
|
client=client, |
|
headers=headers, |
|
) |
|
elif custom_llm_provider == "openai": |
|
model_response = openai_chat_completions.image_generation( |
|
model=model, |
|
prompt=prompt, |
|
timeout=timeout, |
|
api_key=api_key, |
|
api_base=api_base, |
|
logging_obj=litellm_logging_obj, |
|
optional_params=optional_params, |
|
model_response=model_response, |
|
aimg_generation=aimg_generation, |
|
client=client, |
|
) |
|
elif custom_llm_provider == "bedrock": |
|
if model is None: |
|
raise Exception("Model needs to be set for bedrock") |
|
model_response = bedrock_image_generation.image_generation( |
|
model=model, |
|
prompt=prompt, |
|
timeout=timeout, |
|
logging_obj=litellm_logging_obj, |
|
optional_params=optional_params, |
|
model_response=model_response, |
|
aimg_generation=aimg_generation, |
|
) |
|
elif custom_llm_provider == "vertex_ai": |
|
vertex_ai_project = ( |
|
optional_params.pop("vertex_project", None) |
|
or optional_params.pop("vertex_ai_project", None) |
|
or litellm.vertex_project |
|
or get_secret_str("VERTEXAI_PROJECT") |
|
) |
|
vertex_ai_location = ( |
|
optional_params.pop("vertex_location", None) |
|
or optional_params.pop("vertex_ai_location", None) |
|
or litellm.vertex_location |
|
or get_secret_str("VERTEXAI_LOCATION") |
|
) |
|
vertex_credentials = ( |
|
optional_params.pop("vertex_credentials", None) |
|
or optional_params.pop("vertex_ai_credentials", None) |
|
or get_secret_str("VERTEXAI_CREDENTIALS") |
|
) |
|
model_response = vertex_image_generation.image_generation( |
|
model=model, |
|
prompt=prompt, |
|
timeout=timeout, |
|
logging_obj=litellm_logging_obj, |
|
optional_params=optional_params, |
|
model_response=model_response, |
|
vertex_project=vertex_ai_project, |
|
vertex_location=vertex_ai_location, |
|
vertex_credentials=vertex_credentials, |
|
aimg_generation=aimg_generation, |
|
) |
|
elif ( |
|
custom_llm_provider in litellm._custom_providers |
|
): |
|
|
|
custom_handler: Optional[CustomLLM] = None |
|
for item in litellm.custom_provider_map: |
|
if item["provider"] == custom_llm_provider: |
|
custom_handler = item["custom_handler"] |
|
|
|
if custom_handler is None: |
|
raise ValueError( |
|
f"Unable to map your input to a model. Check your input - {args}" |
|
) |
|
|
|
|
|
if aimg_generation is True: |
|
async_custom_client: Optional[AsyncHTTPHandler] = None |
|
if client is not None and isinstance(client, AsyncHTTPHandler): |
|
async_custom_client = client |
|
|
|
|
|
model_response = custom_handler.aimage_generation( |
|
model=model, |
|
prompt=prompt, |
|
api_key=api_key, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
logging_obj=litellm_logging_obj, |
|
timeout=timeout, |
|
client=async_custom_client, |
|
) |
|
else: |
|
custom_client: Optional[HTTPHandler] = None |
|
if client is not None and isinstance(client, HTTPHandler): |
|
custom_client = client |
|
|
|
|
|
model_response = custom_handler.image_generation( |
|
model=model, |
|
prompt=prompt, |
|
api_key=api_key, |
|
api_base=api_base, |
|
model_response=model_response, |
|
optional_params=optional_params, |
|
logging_obj=litellm_logging_obj, |
|
timeout=timeout, |
|
client=custom_client, |
|
) |
|
|
|
return model_response |
|
except Exception as e: |
|
|
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=locals(), |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
@client |
|
async def aimage_variation(*args, **kwargs) -> ImageResponse: |
|
""" |
|
Asynchronously calls the `image_variation` function with the given arguments and keyword arguments. |
|
|
|
Parameters: |
|
- `args` (tuple): Positional arguments to be passed to the `image_variation` function. |
|
- `kwargs` (dict): Keyword arguments to be passed to the `image_variation` function. |
|
|
|
Returns: |
|
- `response` (Any): The response returned by the `image_variation` function. |
|
""" |
|
loop = asyncio.get_event_loop() |
|
model = kwargs.get("model", None) |
|
custom_llm_provider = kwargs.get("custom_llm_provider", None) |
|
|
|
kwargs["async_call"] = True |
|
try: |
|
|
|
func = partial(image_variation, *args, **kwargs) |
|
|
|
|
|
ctx = contextvars.copy_context() |
|
func_with_context = partial(ctx.run, func) |
|
|
|
if custom_llm_provider is None and model is not None: |
|
_, custom_llm_provider, _, _ = get_llm_provider( |
|
model=model, api_base=kwargs.get("api_base", None) |
|
) |
|
|
|
|
|
init_response = await loop.run_in_executor(None, func_with_context) |
|
if isinstance(init_response, dict) or isinstance( |
|
init_response, ImageResponse |
|
): |
|
if isinstance(init_response, dict): |
|
init_response = ImageResponse(**init_response) |
|
response = init_response |
|
elif asyncio.iscoroutine(init_response): |
|
response = await init_response |
|
else: |
|
|
|
response = await loop.run_in_executor(None, func_with_context) |
|
return response |
|
except Exception as e: |
|
custom_llm_provider = custom_llm_provider or "openai" |
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=args, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
@client |
|
def image_variation( |
|
image: FileTypes, |
|
model: str = "dall-e-2", |
|
n: int = 1, |
|
response_format: Literal["url", "b64_json"] = "url", |
|
size: Optional[str] = None, |
|
user: Optional[str] = None, |
|
**kwargs, |
|
) -> ImageResponse: |
|
|
|
client = kwargs.get("client", None) |
|
|
|
litellm_logging_obj = cast(LiteLLMLoggingObj, kwargs.get("litellm_logging_obj")) |
|
|
|
|
|
litellm_params = get_litellm_params(**kwargs) |
|
|
|
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider( |
|
model=model, |
|
custom_llm_provider=litellm_params.get("custom_llm_provider", None), |
|
api_base=litellm_params.get("api_base", None), |
|
api_key=litellm_params.get("api_key", None), |
|
) |
|
|
|
|
|
try: |
|
llm_provider = LlmProviders(custom_llm_provider) |
|
image_variation_provider = LITELLM_IMAGE_VARIATION_PROVIDERS(llm_provider) |
|
except ValueError: |
|
raise ValueError( |
|
f"Invalid image variation provider: {custom_llm_provider}. Supported providers are: {LITELLM_IMAGE_VARIATION_PROVIDERS}" |
|
) |
|
model_response = ImageResponse() |
|
|
|
response: Optional[ImageResponse] = None |
|
|
|
provider_config = ProviderConfigManager.get_provider_model_info( |
|
model=model or "", |
|
provider=llm_provider, |
|
) |
|
|
|
if provider_config is None: |
|
raise ValueError( |
|
f"image variation provider has no known model info config - required for getting api keys, etc.: {custom_llm_provider}. Supported providers are: {LITELLM_IMAGE_VARIATION_PROVIDERS}" |
|
) |
|
|
|
api_key = provider_config.get_api_key(litellm_params.get("api_key", None)) |
|
api_base = provider_config.get_api_base(litellm_params.get("api_base", None)) |
|
|
|
if image_variation_provider == LITELLM_IMAGE_VARIATION_PROVIDERS.OPENAI: |
|
if api_key is None: |
|
raise ValueError("API key is required for OpenAI image variations") |
|
if api_base is None: |
|
raise ValueError("API base is required for OpenAI image variations") |
|
|
|
response = openai_image_variations.image_variations( |
|
model_response=model_response, |
|
api_key=api_key, |
|
api_base=api_base, |
|
model=model, |
|
image=image, |
|
timeout=litellm_params.get("timeout", None), |
|
custom_llm_provider=custom_llm_provider, |
|
logging_obj=litellm_logging_obj, |
|
optional_params={}, |
|
litellm_params=litellm_params, |
|
) |
|
elif image_variation_provider == LITELLM_IMAGE_VARIATION_PROVIDERS.TOPAZ: |
|
if api_key is None: |
|
raise ValueError("API key is required for Topaz image variations") |
|
if api_base is None: |
|
raise ValueError("API base is required for Topaz image variations") |
|
|
|
response = base_llm_aiohttp_handler.image_variations( |
|
model_response=model_response, |
|
api_key=api_key, |
|
api_base=api_base, |
|
model=model, |
|
image=image, |
|
timeout=litellm_params.get("timeout", None), |
|
custom_llm_provider=custom_llm_provider, |
|
logging_obj=litellm_logging_obj, |
|
optional_params={}, |
|
litellm_params=litellm_params, |
|
client=client, |
|
) |
|
|
|
|
|
if response is None: |
|
raise ValueError( |
|
f"Invalid image variation provider: {custom_llm_provider}. Supported providers are: {LITELLM_IMAGE_VARIATION_PROVIDERS}" |
|
) |
|
return response |
|
|
|
|
|
|
|
|
|
|
|
@client |
|
async def atranscription(*args, **kwargs) -> TranscriptionResponse: |
|
""" |
|
Calls openai + azure whisper endpoints. |
|
|
|
Allows router to load balance between them |
|
""" |
|
loop = asyncio.get_event_loop() |
|
model = args[0] if len(args) > 0 else kwargs["model"] |
|
|
|
kwargs["atranscription"] = True |
|
custom_llm_provider = None |
|
try: |
|
|
|
func = partial(transcription, *args, **kwargs) |
|
|
|
|
|
ctx = contextvars.copy_context() |
|
func_with_context = partial(ctx.run, func) |
|
|
|
_, custom_llm_provider, _, _ = get_llm_provider( |
|
model=model, api_base=kwargs.get("api_base", None) |
|
) |
|
|
|
|
|
init_response = await loop.run_in_executor(None, func_with_context) |
|
if isinstance(init_response, dict): |
|
response = TranscriptionResponse(**init_response) |
|
elif isinstance(init_response, TranscriptionResponse): |
|
response = init_response |
|
elif asyncio.iscoroutine(init_response): |
|
response = await init_response |
|
else: |
|
|
|
response = await loop.run_in_executor(None, func_with_context) |
|
return response |
|
except Exception as e: |
|
custom_llm_provider = custom_llm_provider or "openai" |
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=args, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
@client |
|
def transcription( |
|
model: str, |
|
file: FileTypes, |
|
|
|
language: Optional[str] = None, |
|
prompt: Optional[str] = None, |
|
response_format: Optional[ |
|
Literal["json", "text", "srt", "verbose_json", "vtt"] |
|
] = None, |
|
timestamp_granularities: Optional[List[Literal["word", "segment"]]] = None, |
|
temperature: Optional[int] = None, |
|
|
|
user: Optional[str] = None, |
|
timeout=600, |
|
api_key: Optional[str] = None, |
|
api_base: Optional[str] = None, |
|
api_version: Optional[str] = None, |
|
max_retries: Optional[int] = None, |
|
custom_llm_provider=None, |
|
**kwargs, |
|
) -> TranscriptionResponse: |
|
""" |
|
Calls openai + azure whisper endpoints. |
|
|
|
Allows router to load balance between them |
|
""" |
|
litellm_call_id = kwargs.get("litellm_call_id", None) |
|
proxy_server_request = kwargs.get("proxy_server_request", None) |
|
model_info = kwargs.get("model_info", None) |
|
metadata = kwargs.get("metadata", None) |
|
atranscription = kwargs.get("atranscription", False) |
|
atranscription = kwargs.get("atranscription", False) |
|
litellm_logging_obj: LiteLLMLoggingObj = kwargs.get("litellm_logging_obj") |
|
extra_headers = kwargs.get("extra_headers", None) |
|
kwargs.pop("tags", []) |
|
|
|
drop_params = kwargs.get("drop_params", None) |
|
client: Optional[ |
|
Union[ |
|
openai.AsyncOpenAI, |
|
openai.OpenAI, |
|
openai.AzureOpenAI, |
|
openai.AsyncAzureOpenAI, |
|
] |
|
] = kwargs.pop("client", None) |
|
|
|
if litellm_logging_obj: |
|
litellm_logging_obj.model_call_details["client"] = str(client) |
|
|
|
if max_retries is None: |
|
max_retries = openai.DEFAULT_MAX_RETRIES |
|
|
|
model_response = litellm.utils.TranscriptionResponse() |
|
|
|
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider( |
|
model=model, custom_llm_provider=custom_llm_provider, api_base=api_base |
|
) |
|
|
|
if dynamic_api_key is not None: |
|
api_key = dynamic_api_key |
|
|
|
optional_params = get_optional_params_transcription( |
|
model=model, |
|
language=language, |
|
prompt=prompt, |
|
response_format=response_format, |
|
timestamp_granularities=timestamp_granularities, |
|
temperature=temperature, |
|
custom_llm_provider=custom_llm_provider, |
|
drop_params=drop_params, |
|
) |
|
|
|
litellm_logging_obj.update_environment_variables( |
|
model=model, |
|
user=user, |
|
optional_params={}, |
|
litellm_params={ |
|
"litellm_call_id": litellm_call_id, |
|
"proxy_server_request": proxy_server_request, |
|
"model_info": model_info, |
|
"metadata": metadata, |
|
"preset_cache_key": None, |
|
"stream_response": {}, |
|
**kwargs, |
|
}, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
|
|
response: Optional[TranscriptionResponse] = None |
|
if custom_llm_provider == "azure": |
|
|
|
api_base = api_base or litellm.api_base or get_secret_str("AZURE_API_BASE") |
|
|
|
api_version = ( |
|
api_version or litellm.api_version or get_secret_str("AZURE_API_VERSION") |
|
) |
|
|
|
azure_ad_token = kwargs.pop("azure_ad_token", None) or get_secret_str( |
|
"AZURE_AD_TOKEN" |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.azure_key |
|
or get_secret_str("AZURE_API_KEY") |
|
) |
|
|
|
optional_params["extra_headers"] = extra_headers |
|
|
|
response = azure_audio_transcriptions.audio_transcriptions( |
|
model=model, |
|
audio_file=file, |
|
optional_params=optional_params, |
|
model_response=model_response, |
|
atranscription=atranscription, |
|
client=client, |
|
timeout=timeout, |
|
logging_obj=litellm_logging_obj, |
|
api_base=api_base, |
|
api_key=api_key, |
|
api_version=api_version, |
|
azure_ad_token=azure_ad_token, |
|
max_retries=max_retries, |
|
) |
|
elif ( |
|
custom_llm_provider == "openai" |
|
or custom_llm_provider == "groq" |
|
or custom_llm_provider == "fireworks_ai" |
|
): |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("OPENAI_API_BASE") |
|
or "https://api.openai.com/v1" |
|
) |
|
openai.organization = ( |
|
litellm.organization |
|
or get_secret("OPENAI_ORGANIZATION") |
|
or None |
|
) |
|
|
|
api_key = api_key or litellm.api_key or litellm.openai_key or get_secret("OPENAI_API_KEY") |
|
response = openai_audio_transcriptions.audio_transcriptions( |
|
model=model, |
|
audio_file=file, |
|
optional_params=optional_params, |
|
model_response=model_response, |
|
atranscription=atranscription, |
|
client=client, |
|
timeout=timeout, |
|
logging_obj=litellm_logging_obj, |
|
max_retries=max_retries, |
|
api_base=api_base, |
|
api_key=api_key, |
|
) |
|
elif custom_llm_provider == "deepgram": |
|
response = base_llm_http_handler.audio_transcriptions( |
|
model=model, |
|
audio_file=file, |
|
optional_params=optional_params, |
|
model_response=model_response, |
|
atranscription=atranscription, |
|
client=( |
|
client |
|
if client is not None |
|
and ( |
|
isinstance(client, HTTPHandler) |
|
or isinstance(client, AsyncHTTPHandler) |
|
) |
|
else None |
|
), |
|
timeout=timeout, |
|
max_retries=max_retries, |
|
logging_obj=litellm_logging_obj, |
|
api_base=api_base, |
|
api_key=api_key, |
|
custom_llm_provider="deepgram", |
|
headers={}, |
|
) |
|
if response is None: |
|
raise ValueError("Unmapped provider passed in. Unable to get the response.") |
|
return response |
|
|
|
|
|
@client |
|
async def aspeech(*args, **kwargs) -> HttpxBinaryResponseContent: |
|
""" |
|
Calls openai tts endpoints. |
|
""" |
|
loop = asyncio.get_event_loop() |
|
model = args[0] if len(args) > 0 else kwargs["model"] |
|
|
|
kwargs["aspeech"] = True |
|
custom_llm_provider = kwargs.get("custom_llm_provider", None) |
|
try: |
|
|
|
func = partial(speech, *args, **kwargs) |
|
|
|
|
|
ctx = contextvars.copy_context() |
|
func_with_context = partial(ctx.run, func) |
|
|
|
_, custom_llm_provider, _, _ = get_llm_provider( |
|
model=model, api_base=kwargs.get("api_base", None) |
|
) |
|
|
|
|
|
init_response = await loop.run_in_executor(None, func_with_context) |
|
if asyncio.iscoroutine(init_response): |
|
response = await init_response |
|
else: |
|
|
|
response = await loop.run_in_executor(None, func_with_context) |
|
return response |
|
except Exception as e: |
|
custom_llm_provider = custom_llm_provider or "openai" |
|
raise exception_type( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
original_exception=e, |
|
completion_kwargs=args, |
|
extra_kwargs=kwargs, |
|
) |
|
|
|
|
|
@client |
|
def speech( |
|
model: str, |
|
input: str, |
|
voice: Optional[Union[str, dict]] = None, |
|
api_key: Optional[str] = None, |
|
api_base: Optional[str] = None, |
|
api_version: Optional[str] = None, |
|
organization: Optional[str] = None, |
|
project: Optional[str] = None, |
|
max_retries: Optional[int] = None, |
|
metadata: Optional[dict] = None, |
|
timeout: Optional[Union[float, httpx.Timeout]] = None, |
|
response_format: Optional[str] = None, |
|
speed: Optional[int] = None, |
|
client=None, |
|
headers: Optional[dict] = None, |
|
custom_llm_provider: Optional[str] = None, |
|
aspeech: Optional[bool] = None, |
|
**kwargs, |
|
) -> HttpxBinaryResponseContent: |
|
user = kwargs.get("user", None) |
|
litellm_call_id: Optional[str] = kwargs.get("litellm_call_id", None) |
|
proxy_server_request = kwargs.get("proxy_server_request", None) |
|
extra_headers = kwargs.get("extra_headers", None) |
|
model_info = kwargs.get("model_info", None) |
|
model, custom_llm_provider, dynamic_api_key, api_base = get_llm_provider( |
|
model=model, custom_llm_provider=custom_llm_provider, api_base=api_base |
|
) |
|
kwargs.pop("tags", []) |
|
|
|
optional_params = {} |
|
if response_format is not None: |
|
optional_params["response_format"] = response_format |
|
if speed is not None: |
|
optional_params["speed"] = speed |
|
|
|
if timeout is None: |
|
timeout = litellm.request_timeout |
|
|
|
if max_retries is None: |
|
max_retries = litellm.num_retries or openai.DEFAULT_MAX_RETRIES |
|
|
|
logging_obj = kwargs.get("litellm_logging_obj", None) |
|
logging_obj.update_environment_variables( |
|
model=model, |
|
user=user, |
|
optional_params={}, |
|
litellm_params={ |
|
"litellm_call_id": litellm_call_id, |
|
"proxy_server_request": proxy_server_request, |
|
"model_info": model_info, |
|
"metadata": metadata, |
|
"preset_cache_key": None, |
|
"stream_response": {}, |
|
**kwargs, |
|
}, |
|
custom_llm_provider=custom_llm_provider, |
|
) |
|
response: Optional[HttpxBinaryResponseContent] = None |
|
if custom_llm_provider == "openai": |
|
if voice is None or not (isinstance(voice, str)): |
|
raise litellm.BadRequestError( |
|
message="'voice' is required to be passed as a string for OpenAI TTS", |
|
model=model, |
|
llm_provider=custom_llm_provider, |
|
) |
|
api_base = ( |
|
api_base |
|
or litellm.api_base |
|
or get_secret("OPENAI_API_BASE") |
|
or "https://api.openai.com/v1" |
|
) |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.openai_key |
|
or get_secret("OPENAI_API_KEY") |
|
) |
|
|
|
organization = ( |
|
organization |
|
or litellm.organization |
|
or get_secret("OPENAI_ORGANIZATION") |
|
or None |
|
) |
|
|
|
project = ( |
|
project |
|
or litellm.project |
|
or get_secret("OPENAI_PROJECT") |
|
or None |
|
) |
|
|
|
headers = headers or litellm.headers |
|
|
|
response = openai_chat_completions.audio_speech( |
|
model=model, |
|
input=input, |
|
voice=voice, |
|
optional_params=optional_params, |
|
api_key=api_key, |
|
api_base=api_base, |
|
organization=organization, |
|
project=project, |
|
max_retries=max_retries, |
|
timeout=timeout, |
|
client=client, |
|
aspeech=aspeech, |
|
) |
|
elif custom_llm_provider == "azure": |
|
|
|
if voice is None or not (isinstance(voice, str)): |
|
raise litellm.BadRequestError( |
|
message="'voice' is required to be passed as a string for Azure TTS", |
|
model=model, |
|
llm_provider=custom_llm_provider, |
|
) |
|
api_base = api_base or litellm.api_base or get_secret("AZURE_API_BASE") |
|
|
|
api_version = api_version or litellm.api_version or get_secret("AZURE_API_VERSION") |
|
|
|
api_key = ( |
|
api_key |
|
or litellm.api_key |
|
or litellm.azure_key |
|
or get_secret("AZURE_OPENAI_API_KEY") |
|
or get_secret("AZURE_API_KEY") |
|
) |
|
|
|
azure_ad_token: Optional[str] = optional_params.get("extra_body", {}).pop( |
|
"azure_ad_token", None |
|
) or get_secret( |
|
"AZURE_AD_TOKEN" |
|
) |
|
azure_ad_token_provider = kwargs.get("azure_ad_token_provider", None) |
|
|
|
if extra_headers: |
|
optional_params["extra_headers"] = extra_headers |
|
|
|
response = azure_chat_completions.audio_speech( |
|
model=model, |
|
input=input, |
|
voice=voice, |
|
optional_params=optional_params, |
|
api_key=api_key, |
|
api_base=api_base, |
|
api_version=api_version, |
|
azure_ad_token=azure_ad_token, |
|
azure_ad_token_provider=azure_ad_token_provider, |
|
organization=organization, |
|
max_retries=max_retries, |
|
timeout=timeout, |
|
client=client, |
|
aspeech=aspeech, |
|
) |
|
elif custom_llm_provider == "vertex_ai" or custom_llm_provider == "vertex_ai_beta": |
|
|
|
generic_optional_params = GenericLiteLLMParams(**kwargs) |
|
|
|
api_base = generic_optional_params.api_base or "" |
|
vertex_ai_project = ( |
|
generic_optional_params.vertex_project |
|
or litellm.vertex_project |
|
or get_secret_str("VERTEXAI_PROJECT") |
|
) |
|
vertex_ai_location = ( |
|
generic_optional_params.vertex_location |
|
or litellm.vertex_location |
|
or get_secret_str("VERTEXAI_LOCATION") |
|
) |
|
vertex_credentials = ( |
|
generic_optional_params.vertex_credentials |
|
or get_secret_str("VERTEXAI_CREDENTIALS") |
|
) |
|
|
|
if voice is not None and not isinstance(voice, dict): |
|
raise litellm.BadRequestError( |
|
message=f"'voice' is required to be passed as a dict for Vertex AI TTS, passed in voice={voice}", |
|
model=model, |
|
llm_provider=custom_llm_provider, |
|
) |
|
response = vertex_text_to_speech.audio_speech( |
|
_is_async=aspeech, |
|
vertex_credentials=vertex_credentials, |
|
vertex_project=vertex_ai_project, |
|
vertex_location=vertex_ai_location, |
|
timeout=timeout, |
|
api_base=api_base, |
|
model=model, |
|
input=input, |
|
voice=voice, |
|
optional_params=optional_params, |
|
kwargs=kwargs, |
|
logging_obj=logging_obj, |
|
) |
|
|
|
if response is None: |
|
raise Exception( |
|
"Unable to map the custom llm provider={} to a known provider={}.".format( |
|
custom_llm_provider, litellm.provider_list |
|
) |
|
) |
|
return response |
|
|
|
|
|
|
|
|
|
|
|
async def ahealth_check_wildcard_models( |
|
model: str, custom_llm_provider: str, model_params: dict |
|
) -> dict: |
|
|
|
|
|
cheapest_models = pick_cheapest_chat_models_from_llm_provider( |
|
custom_llm_provider=custom_llm_provider, n=3 |
|
) |
|
if len(cheapest_models) == 0: |
|
raise Exception( |
|
f"Unable to health check wildcard model for provider {custom_llm_provider}. Add a model on your config.yaml or contribute here - https://github.com/BerriAI/litellm/blob/main/model_prices_and_context_window.json" |
|
) |
|
if len(cheapest_models) > 1: |
|
fallback_models = cheapest_models[ |
|
1: |
|
] |
|
else: |
|
fallback_models = None |
|
model_params["model"] = cheapest_models[0] |
|
model_params["fallbacks"] = fallback_models |
|
model_params["max_tokens"] = 1 |
|
await acompletion(**model_params) |
|
return {} |
|
|
|
|
|
async def ahealth_check( |
|
model_params: dict, |
|
mode: Optional[ |
|
Literal[ |
|
"chat", |
|
"completion", |
|
"embedding", |
|
"audio_speech", |
|
"audio_transcription", |
|
"image_generation", |
|
"batch", |
|
"rerank", |
|
"realtime", |
|
] |
|
] = "chat", |
|
prompt: Optional[str] = None, |
|
input: Optional[List] = None, |
|
): |
|
""" |
|
Support health checks for different providers. Return remaining rate limit, etc. |
|
|
|
Returns: |
|
{ |
|
"x-ratelimit-remaining-requests": int, |
|
"x-ratelimit-remaining-tokens": int, |
|
"x-ms-region": str, |
|
} |
|
""" |
|
try: |
|
model: Optional[str] = model_params.get("model", None) |
|
if model is None: |
|
raise Exception("model not set") |
|
|
|
if model in litellm.model_cost and mode is None: |
|
mode = litellm.model_cost[model].get("mode") |
|
|
|
model, custom_llm_provider, _, _ = get_llm_provider(model=model) |
|
if model in litellm.model_cost and mode is None: |
|
mode = litellm.model_cost[model].get("mode") |
|
|
|
model_params["cache"] = { |
|
"no-cache": True |
|
} |
|
mode = mode or "chat" |
|
if "*" in model: |
|
return await ahealth_check_wildcard_models( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
model_params=model_params, |
|
) |
|
|
|
mode_handlers = { |
|
"chat": lambda: litellm.acompletion(**model_params), |
|
"completion": lambda: litellm.atext_completion( |
|
**_filter_model_params(model_params), |
|
prompt=prompt or "test", |
|
), |
|
"embedding": lambda: litellm.aembedding( |
|
**_filter_model_params(model_params), |
|
input=input or ["test"], |
|
), |
|
"audio_speech": lambda: litellm.aspeech( |
|
**_filter_model_params(model_params), |
|
input=prompt or "test", |
|
voice="alloy", |
|
), |
|
"audio_transcription": lambda: litellm.atranscription( |
|
**_filter_model_params(model_params), |
|
file=get_audio_file_for_health_check(), |
|
), |
|
"image_generation": lambda: litellm.aimage_generation( |
|
**_filter_model_params(model_params), |
|
prompt=prompt, |
|
), |
|
"rerank": lambda: litellm.arerank( |
|
**_filter_model_params(model_params), |
|
query=prompt or "", |
|
documents=["my sample text"], |
|
), |
|
"realtime": lambda: _realtime_health_check( |
|
model=model, |
|
custom_llm_provider=custom_llm_provider, |
|
api_base=model_params.get("api_base", None), |
|
api_key=model_params.get("api_key", None), |
|
api_version=model_params.get("api_version", None), |
|
), |
|
} |
|
|
|
if mode in mode_handlers: |
|
_response = await mode_handlers[mode]() |
|
|
|
_response_headers: dict = ( |
|
getattr(_response, "_hidden_params", {}).get("headers", {}) or {} |
|
) |
|
return _create_health_check_response(_response_headers) |
|
else: |
|
raise Exception( |
|
f"Mode {mode} not supported. See modes here: https://docs.litellm.ai/docs/proxy/health" |
|
) |
|
except Exception as e: |
|
stack_trace = traceback.format_exc() |
|
if isinstance(stack_trace, str): |
|
stack_trace = stack_trace[:1000] |
|
|
|
if mode is None: |
|
return { |
|
"error": f"error:{str(e)}. Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models \nstacktrace: {stack_trace}" |
|
} |
|
|
|
error_to_return = ( |
|
str(e) |
|
+ "\nHave you set 'mode' - https://docs.litellm.ai/docs/proxy/health#embedding-models" |
|
+ "\nstack trace: " |
|
+ stack_trace |
|
) |
|
return {"error": error_to_return} |
|
|
|
|
|
|
|
|
|
def print_verbose(print_statement): |
|
try: |
|
verbose_logger.debug(print_statement) |
|
if litellm.set_verbose: |
|
print(print_statement) |
|
except Exception: |
|
pass |
|
|
|
|
|
def config_completion(**kwargs): |
|
if litellm.config_path is not None: |
|
config_args = read_config_args(litellm.config_path) |
|
|
|
return completion(**kwargs, **config_args) |
|
else: |
|
raise ValueError( |
|
"No config path set, please set a config path using `litellm.config_path = 'path/to/config.json'`" |
|
) |
|
|
|
|
|
def stream_chunk_builder_text_completion( |
|
chunks: list, messages: Optional[List] = None |
|
) -> TextCompletionResponse: |
|
id = chunks[0]["id"] |
|
object = chunks[0]["object"] |
|
created = chunks[0]["created"] |
|
model = chunks[0]["model"] |
|
system_fingerprint = chunks[0].get("system_fingerprint", None) |
|
finish_reason = chunks[-1]["choices"][0]["finish_reason"] |
|
logprobs = chunks[-1]["choices"][0]["logprobs"] |
|
|
|
response = { |
|
"id": id, |
|
"object": object, |
|
"created": created, |
|
"model": model, |
|
"system_fingerprint": system_fingerprint, |
|
"choices": [ |
|
{ |
|
"text": None, |
|
"index": 0, |
|
"logprobs": logprobs, |
|
"finish_reason": finish_reason, |
|
} |
|
], |
|
"usage": { |
|
"prompt_tokens": None, |
|
"completion_tokens": None, |
|
"total_tokens": None, |
|
}, |
|
} |
|
content_list = [] |
|
for chunk in chunks: |
|
choices = chunk["choices"] |
|
for choice in choices: |
|
if ( |
|
choice is not None |
|
and hasattr(choice, "text") |
|
and choice.get("text") is not None |
|
): |
|
_choice = choice.get("text") |
|
content_list.append(_choice) |
|
|
|
|
|
combined_content = "".join(content_list) |
|
|
|
|
|
response["choices"][0]["text"] = combined_content |
|
|
|
if len(combined_content) > 0: |
|
pass |
|
else: |
|
pass |
|
|
|
try: |
|
response["usage"]["prompt_tokens"] = token_counter( |
|
model=model, messages=messages |
|
) |
|
except ( |
|
Exception |
|
): |
|
print_verbose("token_counter failed, assuming prompt tokens is 0") |
|
response["usage"]["prompt_tokens"] = 0 |
|
response["usage"]["completion_tokens"] = token_counter( |
|
model=model, |
|
text=combined_content, |
|
count_response_tokens=True, |
|
) |
|
response["usage"]["total_tokens"] = ( |
|
response["usage"]["prompt_tokens"] + response["usage"]["completion_tokens"] |
|
) |
|
return TextCompletionResponse(**response) |
|
|
|
|
|
def stream_chunk_builder( |
|
chunks: list, messages: Optional[list] = None, start_time=None, end_time=None |
|
) -> Optional[Union[ModelResponse, TextCompletionResponse]]: |
|
try: |
|
if chunks is None: |
|
raise litellm.APIError( |
|
status_code=500, |
|
message="Error building chunks for logging/streaming usage calculation", |
|
llm_provider="", |
|
model="", |
|
) |
|
if not chunks: |
|
return None |
|
|
|
processor = ChunkProcessor(chunks, messages) |
|
chunks = processor.chunks |
|
|
|
|
|
if len(chunks) == 0: |
|
return None |
|
|
|
if isinstance( |
|
chunks[0]["choices"][0], litellm.utils.TextChoices |
|
): |
|
return stream_chunk_builder_text_completion( |
|
chunks=chunks, messages=messages |
|
) |
|
|
|
model = chunks[0]["model"] |
|
|
|
response = processor.build_base_response(chunks) |
|
|
|
tool_call_chunks = [ |
|
chunk |
|
for chunk in chunks |
|
if len(chunk["choices"]) > 0 |
|
and "tool_calls" in chunk["choices"][0]["delta"] |
|
and chunk["choices"][0]["delta"]["tool_calls"] is not None |
|
] |
|
|
|
if len(tool_call_chunks) > 0: |
|
tool_calls_list = processor.get_combined_tool_content(tool_call_chunks) |
|
_choice = cast(Choices, response.choices[0]) |
|
_choice.message.content = None |
|
_choice.message.tool_calls = tool_calls_list |
|
|
|
function_call_chunks = [ |
|
chunk |
|
for chunk in chunks |
|
if len(chunk["choices"]) > 0 |
|
and "function_call" in chunk["choices"][0]["delta"] |
|
and chunk["choices"][0]["delta"]["function_call"] is not None |
|
] |
|
|
|
if len(function_call_chunks) > 0: |
|
_choice = cast(Choices, response.choices[0]) |
|
_choice.message.content = None |
|
_choice.message.function_call = ( |
|
processor.get_combined_function_call_content(function_call_chunks) |
|
) |
|
|
|
content_chunks = [ |
|
chunk |
|
for chunk in chunks |
|
if len(chunk["choices"]) > 0 |
|
and "content" in chunk["choices"][0]["delta"] |
|
and chunk["choices"][0]["delta"]["content"] is not None |
|
] |
|
|
|
if len(content_chunks) > 0: |
|
response["choices"][0]["message"]["content"] = ( |
|
processor.get_combined_content(content_chunks) |
|
) |
|
|
|
audio_chunks = [ |
|
chunk |
|
for chunk in chunks |
|
if len(chunk["choices"]) > 0 |
|
and "audio" in chunk["choices"][0]["delta"] |
|
and chunk["choices"][0]["delta"]["audio"] is not None |
|
] |
|
|
|
if len(audio_chunks) > 0: |
|
_choice = cast(Choices, response.choices[0]) |
|
_choice.message.audio = processor.get_combined_audio_content(audio_chunks) |
|
|
|
completion_output = get_content_from_model_response(response) |
|
|
|
usage = processor.calculate_usage( |
|
chunks=chunks, |
|
model=model, |
|
completion_output=completion_output, |
|
messages=messages, |
|
) |
|
|
|
setattr(response, "usage", usage) |
|
|
|
return response |
|
except Exception as e: |
|
verbose_logger.exception( |
|
"litellm.main.py::stream_chunk_builder() - Exception occurred - {}".format( |
|
str(e) |
|
) |
|
) |
|
raise litellm.APIError( |
|
status_code=500, |
|
message="Error building chunks for logging/streaming usage calculation", |
|
llm_provider="", |
|
model="", |
|
) |
|
|