import requests import json BASE_URL = "http://localhost:8000" API_KEY = "C0TNRcI8EuhmXpRTNiNsGbUHWBG6KFQt" # Replace with a valid API key headers = { "Content-Type": "application/json", "Authorization": f"Bearer {API_KEY}" # Use Bearer token authentication } def test_chat_completions_non_stream(): url = f"{BASE_URL}/v1/chat/completions" payload = { "model": "meta-llama-405b-turbo", "messages": [ {"role": "user", "content": "Hello, how are you?"}, ], } print("Chat Completions Non Streaming Response:") response = requests.post(url, headers=headers, json=payload) print(json.dumps(response.json(), indent=2)) print("\n") def test_chat_completions_stream(): url = f"{BASE_URL}/v1/chat/completions" payload = { "model": "meta-llama-405b-turbo", "messages": [ {"role": "user", "content": "Hello, how are you?"}, ], "stream": True, } print("Chat Completions Streaming Response:") response = requests.post(url, headers=headers, json=payload, stream=True) for chunk in response.iter_lines(): if chunk: print(chunk.decode("utf-8").replace("data: ", "").strip(), end="\n", flush=True) print("\n") def test_rate_limit_status(): url = f"{BASE_URL}/rate_limit/status" # Updated endpoint response = requests.get(url, headers=headers) print("Rate Limit Status Response:") print(json.dumps(response.json(), indent=2)) print("\n") def test_subscription_status(): url = f"{BASE_URL}/subscription/status" # Updated endpoint response = requests.get(url, headers=headers) print("Subscription Status Response:") print(json.dumps(response.json(), indent=2)) print("\n") def test_available_models(): url = f"{BASE_URL}/models" # Updated endpoint response = requests.get(url, headers=headers) print("Available Models Response:") print(json.dumps(response.json(), indent=2)) print("\n") if __name__ == "__main__": test_chat_completions_non_stream() test_chat_completions_stream() test_rate_limit_status() test_subscription_status() test_available_models()