Spaces:

Raju2024
/

TestLLM

Running

App Files Files Community

TestLLM / litellm /constants.py

Raju2024

Upload 1072 files

e3278e4 verified 12 days ago

raw

history blame contribute delete

11.7 kB

	from typing import List

	ROUTER_MAX_FALLBACKS = 5
	DEFAULT_BATCH_SIZE = 512
	DEFAULT_FLUSH_INTERVAL_SECONDS = 5
	DEFAULT_MAX_RETRIES = 2
	DEFAULT_FAILURE_THRESHOLD_PERCENT = (
	0.5 # default cooldown a deployment if 50% of requests fail in a given minute
	)
	DEFAULT_COOLDOWN_TIME_SECONDS = 5
	DEFAULT_REPLICATE_POLLING_RETRIES = 5
	DEFAULT_REPLICATE_POLLING_DELAY_SECONDS = 1
	DEFAULT_IMAGE_TOKEN_COUNT = 250
	DEFAULT_IMAGE_WIDTH = 300
	DEFAULT_IMAGE_HEIGHT = 300
	SINGLE_DEPLOYMENT_TRAFFIC_FAILURE_THRESHOLD = 1000 # Minimum number of requests to consider "reasonable traffic". Used for single-deployment cooldown logic.
	#### RELIABILITY ####
	REPEATED_STREAMING_CHUNK_LIMIT = 100 # catch if model starts looping the same chunk while streaming. Uses high default to prevent false positives.
	#### Networking settings ####
	request_timeout: float = 6000 # time in seconds

	LITELLM_CHAT_PROVIDERS = [
	"openai",
	"openai_like",
	"xai",
	"custom_openai",
	"text-completion-openai",
	"cohere",
	"cohere_chat",
	"clarifai",
	"anthropic",
	"anthropic_text",
	"replicate",
	"huggingface",
	"together_ai",
	"openrouter",
	"vertex_ai",
	"vertex_ai_beta",
	"gemini",
	"ai21",
	"baseten",
	"azure",
	"azure_text",
	"azure_ai",
	"sagemaker",
	"sagemaker_chat",
	"bedrock",
	"vllm",
	"nlp_cloud",
	"petals",
	"oobabooga",
	"ollama",
	"ollama_chat",
	"deepinfra",
	"perplexity",
	"mistral",
	"groq",
	"nvidia_nim",
	"cerebras",
	"ai21_chat",
	"volcengine",
	"codestral",
	"text-completion-codestral",
	"deepseek",
	"sambanova",
	"maritalk",
	"cloudflare",
	"fireworks_ai",
	"friendliai",
	"watsonx",
	"watsonx_text",
	"triton",
	"predibase",
	"databricks",
	"empower",
	"github",
	"custom",
	"litellm_proxy",
	"hosted_vllm",
	"lm_studio",
	"galadriel",
	]


	OPENAI_CHAT_COMPLETION_PARAMS = [
	"functions",
	"function_call",
	"temperature",
	"temperature",
	"top_p",
	"n",
	"stream",
	"stream_options",
	"stop",
	"max_completion_tokens",
	"modalities",
	"prediction",
	"audio",
	"max_tokens",
	"presence_penalty",
	"frequency_penalty",
	"logit_bias",
	"user",
	"request_timeout",
	"api_base",
	"api_version",
	"api_key",
	"deployment_id",
	"organization",
	"base_url",
	"default_headers",
	"timeout",
	"response_format",
	"seed",
	"tools",
	"tool_choice",
	"max_retries",
	"parallel_tool_calls",
	"logprobs",
	"top_logprobs",
	"reasoning_effort",
	"extra_headers",
	]

	openai_compatible_endpoints: List = [
	"api.perplexity.ai",
	"api.endpoints.anyscale.com/v1",
	"api.deepinfra.com/v1/openai",
	"api.mistral.ai/v1",
	"codestral.mistral.ai/v1/chat/completions",
	"codestral.mistral.ai/v1/fim/completions",
	"api.groq.com/openai/v1",
	"https://integrate.api.nvidia.com/v1",
	"api.deepseek.com/v1",
	"api.together.xyz/v1",
	"app.empower.dev/api/v1",
	"https://api.friendli.ai/serverless/v1",
	"api.sambanova.ai/v1",
	"api.x.ai/v1",
	"api.galadriel.ai/v1",
	]


	openai_compatible_providers: List = [
	"anyscale",
	"mistral",
	"groq",
	"nvidia_nim",
	"cerebras",
	"sambanova",
	"ai21_chat",
	"ai21",
	"volcengine",
	"codestral",
	"deepseek",
	"deepinfra",
	"perplexity",
	"xinference",
	"xai",
	"together_ai",
	"fireworks_ai",
	"empower",
	"friendliai",
	"azure_ai",
	"github",
	"litellm_proxy",
	"hosted_vllm",
	"lm_studio",
	"galadriel",
	]
	openai_text_completion_compatible_providers: List = (
	[ # providers that support `/v1/completions`
	"together_ai",
	"fireworks_ai",
	"hosted_vllm",
	]
	)
	_openai_like_providers: List = [
	"predibase",
	"databricks",
	"watsonx",
	] # private helper. similar to openai but require some custom auth / endpoint handling, so can't use the openai sdk
	# well supported replicate llms
	replicate_models: List = [
	# llama replicate supported LLMs
	"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
	"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
	"meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
	# Vicuna
	"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
	"joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
	# Flan T-5
	"daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f",
	# Others
	"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
	"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
	]

	clarifai_models: List = [
	"clarifai/meta.Llama-3.Llama-3-8B-Instruct",
	"clarifai/gcp.generate.gemma-1_1-7b-it",
	"clarifai/mistralai.completion.mixtral-8x22B",
	"clarifai/cohere.generate.command-r-plus",
	"clarifai/databricks.drbx.dbrx-instruct",
	"clarifai/mistralai.completion.mistral-large",
	"clarifai/mistralai.completion.mistral-medium",
	"clarifai/mistralai.completion.mistral-small",
	"clarifai/mistralai.completion.mixtral-8x7B-Instruct-v0_1",
	"clarifai/gcp.generate.gemma-2b-it",
	"clarifai/gcp.generate.gemma-7b-it",
	"clarifai/deci.decilm.deciLM-7B-instruct",
	"clarifai/mistralai.completion.mistral-7B-Instruct",
	"clarifai/gcp.generate.gemini-pro",
	"clarifai/anthropic.completion.claude-v1",
	"clarifai/anthropic.completion.claude-instant-1_2",
	"clarifai/anthropic.completion.claude-instant",
	"clarifai/anthropic.completion.claude-v2",
	"clarifai/anthropic.completion.claude-2_1",
	"clarifai/meta.Llama-2.codeLlama-70b-Python",
	"clarifai/meta.Llama-2.codeLlama-70b-Instruct",
	"clarifai/openai.completion.gpt-3_5-turbo-instruct",
	"clarifai/meta.Llama-2.llama2-7b-chat",
	"clarifai/meta.Llama-2.llama2-13b-chat",
	"clarifai/meta.Llama-2.llama2-70b-chat",
	"clarifai/openai.chat-completion.gpt-4-turbo",
	"clarifai/microsoft.text-generation.phi-2",
	"clarifai/meta.Llama-2.llama2-7b-chat-vllm",
	"clarifai/upstage.solar.solar-10_7b-instruct",
	"clarifai/openchat.openchat.openchat-3_5-1210",
	"clarifai/togethercomputer.stripedHyena.stripedHyena-Nous-7B",
	"clarifai/gcp.generate.text-bison",
	"clarifai/meta.Llama-2.llamaGuard-7b",
	"clarifai/fblgit.una-cybertron.una-cybertron-7b-v2",
	"clarifai/openai.chat-completion.GPT-4",
	"clarifai/openai.chat-completion.GPT-3_5-turbo",
	"clarifai/ai21.complete.Jurassic2-Grande",
	"clarifai/ai21.complete.Jurassic2-Grande-Instruct",
	"clarifai/ai21.complete.Jurassic2-Jumbo-Instruct",
	"clarifai/ai21.complete.Jurassic2-Jumbo",
	"clarifai/ai21.complete.Jurassic2-Large",
	"clarifai/cohere.generate.cohere-generate-command",
	"clarifai/wizardlm.generate.wizardCoder-Python-34B",
	"clarifai/wizardlm.generate.wizardLM-70B",
	"clarifai/tiiuae.falcon.falcon-40b-instruct",
	"clarifai/togethercomputer.RedPajama.RedPajama-INCITE-7B-Chat",
	"clarifai/gcp.generate.code-gecko",
	"clarifai/gcp.generate.code-bison",
	"clarifai/mistralai.completion.mistral-7B-OpenOrca",
	"clarifai/mistralai.completion.openHermes-2-mistral-7B",
	"clarifai/wizardlm.generate.wizardLM-13B",
	"clarifai/huggingface-research.zephyr.zephyr-7B-alpha",
	"clarifai/wizardlm.generate.wizardCoder-15B",
	"clarifai/microsoft.text-generation.phi-1_5",
	"clarifai/databricks.Dolly-v2.dolly-v2-12b",
	"clarifai/bigcode.code.StarCoder",
	"clarifai/salesforce.xgen.xgen-7b-8k-instruct",
	"clarifai/mosaicml.mpt.mpt-7b-instruct",
	"clarifai/anthropic.completion.claude-3-opus",
	"clarifai/anthropic.completion.claude-3-sonnet",
	"clarifai/gcp.generate.gemini-1_5-pro",
	"clarifai/gcp.generate.imagen-2",
	"clarifai/salesforce.blip.general-english-image-caption-blip-2",
	]


	huggingface_models: List = [
	"meta-llama/Llama-2-7b-hf",
	"meta-llama/Llama-2-7b-chat-hf",
	"meta-llama/Llama-2-13b-hf",
	"meta-llama/Llama-2-13b-chat-hf",
	"meta-llama/Llama-2-70b-hf",
	"meta-llama/Llama-2-70b-chat-hf",
	"meta-llama/Llama-2-7b",
	"meta-llama/Llama-2-7b-chat",
	"meta-llama/Llama-2-13b",
	"meta-llama/Llama-2-13b-chat",
	"meta-llama/Llama-2-70b",
	"meta-llama/Llama-2-70b-chat",
	] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers
	empower_models = [
	"empower/empower-functions",
	"empower/empower-functions-small",
	]

	together_ai_models: List = [
	# llama llms - chat
	"togethercomputer/llama-2-70b-chat",
	# llama llms - language / instruct
	"togethercomputer/llama-2-70b",
	"togethercomputer/LLaMA-2-7B-32K",
	"togethercomputer/Llama-2-7B-32K-Instruct",
	"togethercomputer/llama-2-7b",
	# falcon llms
	"togethercomputer/falcon-40b-instruct",
	"togethercomputer/falcon-7b-instruct",
	# alpaca
	"togethercomputer/alpaca-7b",
	# chat llms
	"HuggingFaceH4/starchat-alpha",
	# code llms
	"togethercomputer/CodeLlama-34b",
	"togethercomputer/CodeLlama-34b-Instruct",
	"togethercomputer/CodeLlama-34b-Python",
	"defog/sqlcoder",
	"NumbersStation/nsql-llama-2-7B",
	"WizardLM/WizardCoder-15B-V1.0",
	"WizardLM/WizardCoder-Python-34B-V1.0",
	# language llms
	"NousResearch/Nous-Hermes-Llama2-13b",
	"Austism/chronos-hermes-13b",
	"upstage/SOLAR-0-70b-16bit",
	"WizardLM/WizardLM-70B-V1.0",
	] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)


	baseten_models: List = [
	"qvv0xeq",
	"q841o8w",
	"31dxrj3",
	] # FALCON 7B # WizardLM # Mosaic ML


	open_ai_embedding_models: List = ["text-embedding-ada-002"]
	cohere_embedding_models: List = [
	"embed-english-v3.0",
	"embed-english-light-v3.0",
	"embed-multilingual-v3.0",
	"embed-english-v2.0",
	"embed-english-light-v2.0",
	"embed-multilingual-v2.0",
	]
	bedrock_embedding_models: List = [
	"amazon.titan-embed-text-v1",
	"cohere.embed-english-v3",
	"cohere.embed-multilingual-v3",
	]


	OPENAI_FINISH_REASONS = ["stop", "length", "function_call", "content_filter", "null"]
	HUMANLOOP_PROMPT_CACHE_TTL_SECONDS = 60 # 1 minute
	RESPONSE_FORMAT_TOOL_NAME = "json_tool_call" # default tool name used when converting response format to tool call

	########################### Logging Callback Constants ###########################
	AZURE_STORAGE_MSFT_VERSION = "2019-07-07"

	########################### LiteLLM Proxy Specific Constants ###########################
	########################################################################################
	MAX_SPENDLOG_ROWS_TO_QUERY = (
	1_000_000 # if spendLogs has more than 1M rows, do not query the DB
	)
	# makes it clear this is a rate limit error for a litellm virtual key
	RATE_LIMIT_ERROR_MESSAGE_FOR_VIRTUAL_KEY = "LiteLLM Virtual Key user_api_key_hash"

	# pass through route constansts
	BEDROCK_AGENT_RUNTIME_PASS_THROUGH_ROUTES = [
	"agents/",
	"knowledgebases/",
	"flows/",
	"retrieveAndGenerate/",
	"rerank/",
	"generateQuery/",
	"optimize-prompt/",
	]

	BATCH_STATUS_POLL_INTERVAL_SECONDS = 3600 # 1 hour
	BATCH_STATUS_POLL_MAX_ATTEMPTS = 24 # for 24 hours

	HEALTH_CHECK_TIMEOUT_SECONDS = 60 # 60 seconds

	UI_SESSION_TOKEN_TEAM_ID = "litellm-dashboard"