Spaces:
Running
Running
import sys, os | |
import traceback | |
from dotenv import load_dotenv | |
load_dotenv() | |
import os, io | |
sys.path.insert( | |
0, os.path.abspath("../..") | |
) # Adds the parent directory to the system path | |
import pytest, asyncio | |
import litellm | |
from litellm import embedding, completion, completion_cost, Timeout, acompletion | |
from litellm import RateLimitError | |
import json | |
import os | |
import tempfile | |
litellm.num_retries = 3 | |
litellm.cache = None | |
user_message = "Write a short poem about the sky" | |
messages = [{"content": user_message, "role": "user"}] | |
def load_vertex_ai_credentials(): | |
# Define the path to the vertex_key.json file | |
print("loading vertex ai credentials") | |
filepath = os.path.dirname(os.path.abspath(__file__)) | |
vertex_key_path = filepath + "/vertex_key.json" | |
# Read the existing content of the file or create an empty dictionary | |
try: | |
with open(vertex_key_path, "r") as file: | |
# Read the file content | |
print("Read vertexai file path") | |
content = file.read() | |
# If the file is empty or not valid JSON, create an empty dictionary | |
if not content or not content.strip(): | |
service_account_key_data = {} | |
else: | |
# Attempt to load the existing JSON content | |
file.seek(0) | |
service_account_key_data = json.load(file) | |
except FileNotFoundError: | |
# If the file doesn't exist, create an empty dictionary | |
service_account_key_data = {} | |
# Update the service_account_key_data with environment variables | |
private_key_id = os.environ.get("VERTEX_AI_PRIVATE_KEY_ID", "") | |
private_key = os.environ.get("VERTEX_AI_PRIVATE_KEY", "") | |
private_key = private_key.replace("\\n", "\n") | |
service_account_key_data["private_key_id"] = private_key_id | |
service_account_key_data["private_key"] = private_key | |
# Create a temporary file | |
with tempfile.NamedTemporaryFile(mode="w+", delete=False) as temp_file: | |
# Write the updated content to the temporary file | |
json.dump(service_account_key_data, temp_file, indent=2) | |
# Export the temporary file as GOOGLE_APPLICATION_CREDENTIALS | |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = os.path.abspath(temp_file.name) | |
async def get_response(): | |
load_vertex_ai_credentials() | |
prompt = '\ndef count_nums(arr):\n """\n Write a function count_nums which takes an array of integers and returns\n the number of elements which has a sum of digits > 0.\n If a number is negative, then its first signed digit will be negative:\n e.g. -123 has signed digits -1, 2, and 3.\n >>> count_nums([]) == 0\n >>> count_nums([-1, 11, -11]) == 1\n >>> count_nums([1, 1, 2]) == 3\n """\n' | |
try: | |
response = await acompletion( | |
model="gemini-pro", | |
messages=[ | |
{ | |
"role": "system", | |
"content": "Complete the given code with no more explanation. Remember that there is a 4-space indent before the first line of your generated code.", | |
}, | |
{"role": "user", "content": prompt}, | |
], | |
) | |
return response | |
except litellm.UnprocessableEntityError as e: | |
pass | |
except Exception as e: | |
pytest.fail(f"An error occurred - {str(e)}") | |
def test_vertex_ai(): | |
import random | |
load_vertex_ai_credentials() | |
test_models = ( | |
litellm.vertex_chat_models | |
+ litellm.vertex_code_chat_models | |
+ litellm.vertex_text_models | |
+ litellm.vertex_code_text_models | |
) | |
litellm.set_verbose = False | |
litellm.vertex_project = "reliablekeys" | |
test_models = random.sample(test_models, 1) | |
# test_models += litellm.vertex_language_models # always test gemini-pro | |
test_models = litellm.vertex_language_models # always test gemini-pro | |
for model in test_models: | |
try: | |
if model in [ | |
"code-gecko", | |
"code-gecko@001", | |
"code-gecko@002", | |
"code-gecko@latest", | |
"code-bison@001", | |
"text-bison@001", | |
]: | |
# our account does not have access to this model | |
continue | |
print("making request", model) | |
response = completion( | |
model=model, | |
messages=[{"role": "user", "content": "hi"}], | |
temperature=0.7, | |
) | |
print("\nModel Response", response) | |
print(response) | |
assert type(response.choices[0].message.content) == str | |
assert len(response.choices[0].message.content) > 1 | |
except Exception as e: | |
pytest.fail(f"Error occurred: {e}") | |
# test_vertex_ai() | |
def test_vertex_ai_stream(): | |
load_vertex_ai_credentials() | |
litellm.set_verbose = False | |
litellm.vertex_project = "reliablekeys" | |
import random | |
test_models = ( | |
litellm.vertex_chat_models | |
+ litellm.vertex_code_chat_models | |
+ litellm.vertex_text_models | |
+ litellm.vertex_code_text_models | |
) | |
test_models = random.sample(test_models, 1) | |
test_models += litellm.vertex_language_models # always test gemini-pro | |
for model in test_models: | |
try: | |
if model in [ | |
"code-gecko", | |
"code-gecko@001", | |
"code-gecko@002", | |
"code-gecko@latest", | |
"code-bison@001", | |
"text-bison@001", | |
]: | |
# our account does not have access to this model | |
continue | |
print("making request", model) | |
response = completion( | |
model=model, | |
messages=[ | |
{"role": "user", "content": "write 10 line code code for saying hi"} | |
], | |
stream=True, | |
) | |
completed_str = "" | |
for chunk in response: | |
print(chunk) | |
content = chunk.choices[0].delta.content or "" | |
print("\n content", content) | |
completed_str += content | |
assert type(content) == str | |
# pass | |
assert len(completed_str) > 4 | |
except Exception as e: | |
pytest.fail(f"Error occurred: {e}") | |
# test_vertex_ai_stream() | |
async def test_async_vertexai_response(): | |
import random | |
load_vertex_ai_credentials() | |
test_models = ( | |
litellm.vertex_chat_models | |
+ litellm.vertex_code_chat_models | |
+ litellm.vertex_text_models | |
+ litellm.vertex_code_text_models | |
) | |
test_models = random.sample(test_models, 1) | |
test_models += litellm.vertex_language_models # always test gemini-pro | |
for model in test_models: | |
print(f"model being tested in async call: {model}") | |
if model in [ | |
"code-gecko", | |
"code-gecko@001", | |
"code-gecko@002", | |
"code-gecko@latest", | |
"code-bison@001", | |
"text-bison@001", | |
]: | |
# our account does not have access to this model | |
continue | |
try: | |
user_message = "Hello, how are you?" | |
messages = [{"content": user_message, "role": "user"}] | |
response = await acompletion( | |
model=model, messages=messages, temperature=0.7, timeout=5 | |
) | |
print(f"response: {response}") | |
except litellm.Timeout as e: | |
pass | |
except Exception as e: | |
pytest.fail(f"An exception occurred: {e}") | |
# asyncio.run(test_async_vertexai_response()) | |
async def test_async_vertexai_streaming_response(): | |
import random | |
load_vertex_ai_credentials() | |
test_models = ( | |
litellm.vertex_chat_models | |
+ litellm.vertex_code_chat_models | |
+ litellm.vertex_text_models | |
+ litellm.vertex_code_text_models | |
) | |
test_models = random.sample(test_models, 1) | |
test_models += litellm.vertex_language_models # always test gemini-pro | |
for model in test_models: | |
if model in [ | |
"code-gecko", | |
"code-gecko@001", | |
"code-gecko@002", | |
"code-gecko@latest", | |
"code-bison@001", | |
"text-bison@001", | |
]: | |
# our account does not have access to this model | |
continue | |
try: | |
user_message = "Hello, how are you?" | |
messages = [{"content": user_message, "role": "user"}] | |
response = await acompletion( | |
model="gemini-pro", | |
messages=messages, | |
temperature=0.7, | |
timeout=5, | |
stream=True, | |
) | |
print(f"response: {response}") | |
complete_response = "" | |
async for chunk in response: | |
print(f"chunk: {chunk}") | |
complete_response += chunk.choices[0].delta.content | |
print(f"complete_response: {complete_response}") | |
assert len(complete_response) > 0 | |
except litellm.Timeout as e: | |
pass | |
except Exception as e: | |
print(e) | |
pytest.fail(f"An exception occurred: {e}") | |
# asyncio.run(test_async_vertexai_streaming_response()) | |
def test_gemini_pro_vision(): | |
try: | |
load_vertex_ai_credentials() | |
litellm.set_verbose = True | |
litellm.num_retries = 0 | |
resp = litellm.completion( | |
model="vertex_ai/gemini-pro-vision", | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": "Whats in this image?"}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": "gs://cloud-samples-data/generative-ai/image/boats.jpeg" | |
}, | |
}, | |
], | |
} | |
], | |
) | |
print(resp) | |
prompt_tokens = resp.usage.prompt_tokens | |
# DO Not DELETE this ASSERT | |
# Google counts the prompt tokens for us, we should ensure we use the tokens from the orignal response | |
assert prompt_tokens == 263 # the gemini api returns 263 to us | |
except Exception as e: | |
import traceback | |
traceback.print_exc() | |
raise e | |
# test_gemini_pro_vision() | |
def gemini_pro_function_calling(): | |
load_vertex_ai_credentials() | |
tools = [ | |
{ | |
"type": "function", | |
"function": { | |
"name": "get_current_weather", | |
"description": "Get the current weather in a given location", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"location": { | |
"type": "string", | |
"description": "The city and state, e.g. San Francisco, CA", | |
}, | |
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, | |
}, | |
"required": ["location"], | |
}, | |
}, | |
} | |
] | |
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] | |
completion = litellm.completion( | |
model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" | |
) | |
print(f"completion: {completion}") | |
# gemini_pro_function_calling() | |
async def gemini_pro_async_function_calling(): | |
load_vertex_ai_credentials() | |
tools = [ | |
{ | |
"type": "function", | |
"function": { | |
"name": "get_current_weather", | |
"description": "Get the current weather in a given location", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"location": { | |
"type": "string", | |
"description": "The city and state, e.g. San Francisco, CA", | |
}, | |
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, | |
}, | |
"required": ["location"], | |
}, | |
}, | |
} | |
] | |
messages = [{"role": "user", "content": "What's the weather like in Boston today?"}] | |
completion = await litellm.acompletion( | |
model="gemini-pro", messages=messages, tools=tools, tool_choice="auto" | |
) | |
print(f"completion: {completion}") | |
asyncio.run(gemini_pro_async_function_calling()) | |
# Extra gemini Vision tests for completion + stream, async, async + stream | |
# if we run into issues with gemini, we will also add these to our ci/cd pipeline | |
# def test_gemini_pro_vision_stream(): | |
# try: | |
# litellm.set_verbose = False | |
# litellm.num_retries=0 | |
# print("streaming response from gemini-pro-vision") | |
# resp = litellm.completion( | |
# model = "vertex_ai/gemini-pro-vision", | |
# messages=[ | |
# { | |
# "role": "user", | |
# "content": [ | |
# { | |
# "type": "text", | |
# "text": "Whats in this image?" | |
# }, | |
# { | |
# "type": "image_url", | |
# "image_url": { | |
# "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" | |
# } | |
# } | |
# ] | |
# } | |
# ], | |
# stream=True | |
# ) | |
# print(resp) | |
# for chunk in resp: | |
# print(chunk) | |
# except Exception as e: | |
# import traceback | |
# traceback.print_exc() | |
# raise e | |
# test_gemini_pro_vision_stream() | |
# def test_gemini_pro_vision_async(): | |
# try: | |
# litellm.set_verbose = True | |
# litellm.num_retries=0 | |
# async def test(): | |
# resp = await litellm.acompletion( | |
# model = "vertex_ai/gemini-pro-vision", | |
# messages=[ | |
# { | |
# "role": "user", | |
# "content": [ | |
# { | |
# "type": "text", | |
# "text": "Whats in this image?" | |
# }, | |
# { | |
# "type": "image_url", | |
# "image_url": { | |
# "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" | |
# } | |
# } | |
# ] | |
# } | |
# ], | |
# ) | |
# print("async response gemini pro vision") | |
# print(resp) | |
# asyncio.run(test()) | |
# except Exception as e: | |
# import traceback | |
# traceback.print_exc() | |
# raise e | |
# test_gemini_pro_vision_async() | |
# def test_gemini_pro_vision_async_stream(): | |
# try: | |
# litellm.set_verbose = True | |
# litellm.num_retries=0 | |
# async def test(): | |
# resp = await litellm.acompletion( | |
# model = "vertex_ai/gemini-pro-vision", | |
# messages=[ | |
# { | |
# "role": "user", | |
# "content": [ | |
# { | |
# "type": "text", | |
# "text": "Whats in this image?" | |
# }, | |
# { | |
# "type": "image_url", | |
# "image_url": { | |
# "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" | |
# } | |
# } | |
# ] | |
# } | |
# ], | |
# stream=True | |
# ) | |
# print("async response gemini pro vision") | |
# print(resp) | |
# for chunk in resp: | |
# print(chunk) | |
# asyncio.run(test()) | |
# except Exception as e: | |
# import traceback | |
# traceback.print_exc() | |
# raise e | |
# test_gemini_pro_vision_async() | |