Spaces:

DevsDoCode
/

Test-Running

Sleeping

App Files Files Community

Test-Running / API_provider.py

DevsDoCode

Upload 10 files

3702f2a verified 4 months ago

raw

history blame

3.89 kB

	import json
	import uuid
	from typing import Iterator, Union, List, Dict
	from dotenv import load_dotenv; load_dotenv()
	import os
	import requests

	AVAILABLE_MODELS = [
	"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
	"o1-mini",
	"claude-3-sonnet-20240229",
	"gemini-1.5-pro",
	"gemini-1.5-flash",
	"o1-preview",
	"gpt-4o"
	]

	def API_Inference(
	messages: List[Dict[str, str]],
	model: str = "meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",
	stream: bool = False,
	max_tokens: int = 4000,
	temperature: float = 0.7,
	top_p: float = 0.95,
	) -> Union[str, Iterator[str], None]:
	if model not in AVAILABLE_MODELS:
	raise ValueError(
	f"Model {model} not available. Available models: {', '.join(AVAILABLE_MODELS)}"
	)

	if model == "claude-3-sonnet-20240229":
	messages = [{"role": "system", "content": "."}] + [msg for msg in messages if msg["role"] != "system"]

	api_endpoint = os.environ.get("AMIGO_BASE_URL")
	headers = {
	"Accept": "/",
	"Accept-Encoding": "gzip, deflate, br, zstd",
	"Authorization": "Bearer ",
	"Content-Type": "application/json",
	"User-Agent": (
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0"
	),
	"X-Device-UUID": str(uuid.uuid4()),
	}

	payload = {
	"messages": messages,
	"model": model,
	"max_tokens": max_tokens,
	"stream": stream,
	"presence_penalty": 0,
	"temperature": temperature,
	"top_p": top_p,
	}

	try:
	response = requests.post(api_endpoint, headers=headers, json=payload, stream=stream)
	response.raise_for_status()
	except requests.exceptions.RequestException as e:
	print("An error occurred while making the request:", e)
	return None

	def process_response() -> Iterator[str]:
	for line in response.iter_lines():
	if line:
	# Decode the line from bytes to string
	decoded_line = line.decode('utf-8').strip()
	if decoded_line.startswith("data: "):
	data_str = decoded_line[6:]
	if data_str == "[DONE]":
	break
	try:
	# Load the JSON data
	data_json = json.loads(data_str)

	# Extract the content from the response
	choices = data_json.get("choices", [])
	if choices:
	delta = choices[0].get("delta", {})
	content = delta.get("content", "")
	if content:
	yield content
	except json.JSONDecodeError:
	print(f"Received non-JSON data: {data_str}")

	if stream:
	return process_response()
	else:
	return "".join(process_response())

	if __name__ == "__main__":
	# Example usage with the new format
	conversation = [
	{"role": "system", "content": "You are a helpful and friendly AI assistant."},
	{"role": "user", "content": "What is the capital of France?"},
	{"role": "assistant", "content": "Paris"},
	{"role": "user", "content": "Who are you. Are you GPT-4o or gpt-3.5?"}
	]

	# For non-streaming response
	response = API_Inference(conversation, stream=False, model="claude-3-sonnet-20240229")
	print(response)

	print("--" * 50)

	# # For streaming response
	for chunk in API_Inference(conversation, stream=True, model="gpt-4o"):
	print(chunk, end="", flush=True)