|
from typing import List |
|
|
|
ROUTER_MAX_FALLBACKS = 5 |
|
DEFAULT_BATCH_SIZE = 512 |
|
DEFAULT_FLUSH_INTERVAL_SECONDS = 5 |
|
DEFAULT_MAX_RETRIES = 2 |
|
DEFAULT_FAILURE_THRESHOLD_PERCENT = ( |
|
0.5 |
|
) |
|
DEFAULT_COOLDOWN_TIME_SECONDS = 5 |
|
DEFAULT_REPLICATE_POLLING_RETRIES = 5 |
|
DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1 |
|
DEFAULT_IMAGE_TOKEN_COUNT = 250 |
|
DEFAULT_IMAGE_WIDTH = 300 |
|
DEFAULT_IMAGE_HEIGHT = 300 |
|
SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 |
|
|
|
REPEATED_STREAMING_CHUNK_LIMIT = 100 |
|
|
|
request_timeout: float = 6000 |
|
|
|
LITELLM_CHAT_PROVIDERS = [ |
|
"openai", |
|
"openai_like", |
|
"xai", |
|
"custom_openai", |
|
"text-completion-openai", |
|
"cohere", |
|
"cohere_chat", |
|
"clarifai", |
|
"anthropic", |
|
"anthropic_text", |
|
"replicate", |
|
"huggingface", |
|
"together_ai", |
|
"openrouter", |
|
"vertex_ai", |
|
"vertex_ai_beta", |
|
"gemini", |
|
"ai21", |
|
"baseten", |
|
"azure", |
|
"azure_text", |
|
"azure_ai", |
|
"sagemaker", |
|
"sagemaker_chat", |
|
"bedrock", |
|
"vllm", |
|
"nlp_cloud", |
|
"petals", |
|
"oobabooga", |
|
"ollama", |
|
"ollama_chat", |
|
"deepinfra", |
|
"perplexity", |
|
"mistral", |
|
"groq", |
|
"nvidia_nim", |
|
"cerebras", |
|
"ai21_chat", |
|
"volcengine", |
|
"codestral", |
|
"text-completion-codestral", |
|
"deepseek", |
|
"sambanova", |
|
"maritalk", |
|
"cloudflare", |
|
"fireworks_ai", |
|
"friendliai", |
|
"watsonx", |
|
"watsonx_text", |
|
"triton", |
|
"predibase", |
|
"databricks", |
|
"empower", |
|
"github", |
|
"custom", |
|
"litellm_proxy", |
|
"hosted_vllm", |
|
"lm_studio", |
|
"galadriel", |
|
] |
|
|
|
|
|
OPENAI_CHAT_COMPLETION_PARAMS = [ |
|
"functions", |
|
"function_call", |
|
"temperature", |
|
"temperature", |
|
"top_p", |
|
"n", |
|
"stream", |
|
"stream_options", |
|
"stop", |
|
"max_completion_tokens", |
|
"modalities", |
|
"prediction", |
|
"audio", |
|
"max_tokens", |
|
"presence_penalty", |
|
"frequency_penalty", |
|
"logit_bias", |
|
"user", |
|
"request_timeout", |
|
"api_base", |
|
"api_version", |
|
"api_key", |
|
"deployment_id", |
|
"organization", |
|
"base_url", |
|
"default_headers", |
|
"timeout", |
|
"response_format", |
|
"seed", |
|
"tools", |
|
"tool_choice", |
|
"max_retries", |
|
"parallel_tool_calls", |
|
"logprobs", |
|
"top_logprobs", |
|
"reasoning_effort", |
|
"extra_headers", |
|
] |
|
|
|
openai_compatible_endpoints: List = [ |
|
"api.perplexity.ai", |
|
"api.endpoints.anyscale.com/v1", |
|
"api.deepinfra.com/v1/openai", |
|
"api.mistral.ai/v1", |
|
"codestral.mistral.ai/v1/chat/completions", |
|
"codestral.mistral.ai/v1/fim/completions", |
|
"api.groq.com/openai/v1", |
|
"https://integrate.api.nvidia.com/v1", |
|
"api.deepseek.com/v1", |
|
"api.together.xyz/v1", |
|
"app.empower.dev/api/v1", |
|
"https://api.friendli.ai/serverless/v1", |
|
"api.sambanova.ai/v1", |
|
"api.x.ai/v1", |
|
"api.galadriel.ai/v1", |
|
] |
|
|
|
|
|
openai_compatible_providers: List = [ |
|
"anyscale", |
|
"mistral", |
|
"groq", |
|
"nvidia_nim", |
|
"cerebras", |
|
"sambanova", |
|
"ai21_chat", |
|
"ai21", |
|
"volcengine", |
|
"codestral", |
|
"deepseek", |
|
"deepinfra", |
|
"perplexity", |
|
"xinference", |
|
"xai", |
|
"together_ai", |
|
"fireworks_ai", |
|
"empower", |
|
"friendliai", |
|
"azure_ai", |
|
"github", |
|
"litellm_proxy", |
|
"hosted_vllm", |
|
"lm_studio", |
|
"galadriel", |
|
] |
|
openai_text_completion_compatible_providers: List = ( |
|
[ |
|
"together_ai", |
|
"fireworks_ai", |
|
"hosted_vllm", |
|
] |
|
) |
|
_openai_like_providers: List = [ |
|
"predibase", |
|
"databricks", |
|
"watsonx", |
|
] |
|
|
|
replicate_models: List = [ |
|
|
|
"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", |
|
"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", |
|
"meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", |
|
|
|
"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", |
|
"joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe", |
|
|
|
"daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f", |
|
|
|
"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5", |
|
"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", |
|
] |
|
|
|
clarifai_models: List = [ |
|
"clarifai/meta.Llama-3.Llama-3-8B-Instruct", |
|
"clarifai/gcp.generate.gemma-1_1-7b-it", |
|
"clarifai/mistralai.completion.mixtral-8x22B", |
|
"clarifai/cohere.generate.command-r-plus", |
|
"clarifai/databricks.drbx.dbrx-instruct", |
|
"clarifai/mistralai.completion.mistral-large", |
|
"clarifai/mistralai.completion.mistral-medium", |
|
"clarifai/mistralai.completion.mistral-small", |
|
"clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1", |
|
"clarifai/gcp.generate.gemma-2b-it", |
|
"clarifai/gcp.generate.gemma-7b-it", |
|
"clarifai/deci.decilm.deciLM-7B-instruct", |
|
"clarifai/mistralai.completion.mistral-7B-Instruct", |
|
"clarifai/gcp.generate.gemini-pro", |
|
"clarifai/anthropic.completion.claude-v1", |
|
"clarifai/anthropic.completion.claude-instant-1_2", |
|
"clarifai/anthropic.completion.claude-instant", |
|
"clarifai/anthropic.completion.claude-v2", |
|
"clarifai/anthropic.completion.claude-2_1", |
|
"clarifai/meta.Llama-2.codeLlama-70b-Python", |
|
"clarifai/meta.Llama-2.codeLlama-70b-Instruct", |
|
"clarifai/openai.completion.gpt-3_5-turbo-instruct", |
|
"clarifai/meta.Llama-2.llama2-7b-chat", |
|
"clarifai/meta.Llama-2.llama2-13b-chat", |
|
"clarifai/meta.Llama-2.llama2-70b-chat", |
|
"clarifai/openai.chat-completion.gpt-4-turbo", |
|
"clarifai/microsoft.text-generation.phi-2", |
|
"clarifai/meta.Llama-2.llama2-7b-chat-vllm", |
|
"clarifai/upstage.solar.solar-10_7b-instruct", |
|
"clarifai/openchat.openchat.openchat-3_5-1210", |
|
"clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B", |
|
"clarifai/gcp.generate.text-bison", |
|
"clarifai/meta.Llama-2.llamaGuard-7b", |
|
"clarifai/fblgit.una-cybertron.una-cybertron-7b-v2", |
|
"clarifai/openai.chat-completion.GPT-4", |
|
"clarifai/openai.chat-completion.GPT-3_5-turbo", |
|
"clarifai/ai21.complete.Jurassic2-Grande", |
|
"clarifai/ai21.complete.Jurassic2-Grande-Instruct", |
|
"clarifai/ai21.complete.Jurassic2-Jumbo-Instruct", |
|
"clarifai/ai21.complete.Jurassic2-Jumbo", |
|
"clarifai/ai21.complete.Jurassic2-Large", |
|
"clarifai/cohere.generate.cohere-generate-command", |
|
"clarifai/wizardlm.generate.wizardCoder-Python-34B", |
|
"clarifai/wizardlm.generate.wizardLM-70B", |
|
"clarifai/tiiuae.falcon.falcon-40b-instruct", |
|
"clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat", |
|
"clarifai/gcp.generate.code-gecko", |
|
"clarifai/gcp.generate.code-bison", |
|
"clarifai/mistralai.completion.mistral-7B-OpenOrca", |
|
"clarifai/mistralai.completion.openHermes-2-mistral-7B", |
|
"clarifai/wizardlm.generate.wizardLM-13B", |
|
"clarifai/huggingface-research.zephyr.zephyr-7B-alpha", |
|
"clarifai/wizardlm.generate.wizardCoder-15B", |
|
"clarifai/microsoft.text-generation.phi-1_5", |
|
"clarifai/databricks.Dolly-v2.dolly-v2-12b", |
|
"clarifai/bigcode.code.StarCoder", |
|
"clarifai/salesforce.xgen.xgen-7b-8k-instruct", |
|
"clarifai/mosaicml.mpt.mpt-7b-instruct", |
|
"clarifai/anthropic.completion.claude-3-opus", |
|
"clarifai/anthropic.completion.claude-3-sonnet", |
|
"clarifai/gcp.generate.gemini-1_5-pro", |
|
"clarifai/gcp.generate.imagen-2", |
|
"clarifai/salesforce.blip.general-english-image-caption-blip-2", |
|
] |
|
|
|
|
|
huggingface_models: List = [ |
|
"meta-llama/Llama-2-7b-hf", |
|
"meta-llama/Llama-2-7b-chat-hf", |
|
"meta-llama/Llama-2-13b-hf", |
|
"meta-llama/Llama-2-13b-chat-hf", |
|
"meta-llama/Llama-2-70b-hf", |
|
"meta-llama/Llama-2-70b-chat-hf", |
|
"meta-llama/Llama-2-7b", |
|
"meta-llama/Llama-2-7b-chat", |
|
"meta-llama/Llama-2-13b", |
|
"meta-llama/Llama-2-13b-chat", |
|
"meta-llama/Llama-2-70b", |
|
"meta-llama/Llama-2-70b-chat", |
|
] |
|
empower_models = [ |
|
"empower/empower-functions", |
|
"empower/empower-functions-small", |
|
] |
|
|
|
together_ai_models: List = [ |
|
|
|
"togethercomputer/llama-2-70b-chat", |
|
|
|
"togethercomputer/llama-2-70b", |
|
"togethercomputer/LLaMA-2-7B-32K", |
|
"togethercomputer/Llama-2-7B-32K-Instruct", |
|
"togethercomputer/llama-2-7b", |
|
|
|
"togethercomputer/falcon-40b-instruct", |
|
"togethercomputer/falcon-7b-instruct", |
|
|
|
"togethercomputer/alpaca-7b", |
|
|
|
"HuggingFaceH4/starchat-alpha", |
|
|
|
"togethercomputer/CodeLlama-34b", |
|
"togethercomputer/CodeLlama-34b-Instruct", |
|
"togethercomputer/CodeLlama-34b-Python", |
|
"defog/sqlcoder", |
|
"NumbersStation/nsql-llama-2-7B", |
|
"WizardLM/WizardCoder-15B-V1.0", |
|
"WizardLM/WizardCoder-Python-34B-V1.0", |
|
|
|
"NousResearch/Nous-Hermes-Llama2-13b", |
|
"Austism/chronos-hermes-13b", |
|
"upstage/SOLAR-0-70b-16bit", |
|
"WizardLM/WizardLM-70B-V1.0", |
|
] |
|
|
|
|
|
baseten_models: List = [ |
|
"qvv0xeq", |
|
"q841o8w", |
|
"31dxrj3", |
|
] |
|
|
|
|
|
open_ai_embedding_models: List = ["text-embedding-ada-002"] |
|
cohere_embedding_models: List = [ |
|
"embed-english-v3.0", |
|
"embed-english-light-v3.0", |
|
"embed-multilingual-v3.0", |
|
"embed-english-v2.0", |
|
"embed-english-light-v2.0", |
|
"embed-multilingual-v2.0", |
|
] |
|
bedrock_embedding_models: List = [ |
|
"amazon.titan-embed-text-v1", |
|
"cohere.embed-english-v3", |
|
"cohere.embed-multilingual-v3", |
|
] |
|
|
|
|
|
OPENAI_FINISH_REASONS = ["stop", "length", "function_call", "content_filter", "null"] |
|
HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = 60 |
|
RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" |
|
|
|
|
|
AZURE_STORAGE_MSFT_VERSION = "2019-07-07" |
|
|
|
|
|
|
|
MAX_SPENDLOG_ROWS_TO_QUERY = ( |
|
1_000_000 |
|
) |
|
|
|
RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash" |
|
|
|
|
|
BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES = [ |
|
"agents/", |
|
"knowledgebases/", |
|
"flows/", |
|
"retrieveAndGenerate/", |
|
"rerank/", |
|
"generateQuery/", |
|
"optimize-prompt/", |
|
] |
|
|
|
BATCH_STATUS_POLL_INTERVAL_SECONDS = 3600 |
|
BATCH_STATUS_POLL_MAX_ATTEMPTS = 24 |
|
|
|
HEALTH_CHECK_TIMEOUT_SECONDS = 60 |
|
|
|
UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard" |
|
|