tianlong12 commited on
Commit
97ce33c
·
verified ·
1 Parent(s): 3a20738

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import random
3
+ import json
4
+ import aiohttp
5
+ import asyncio
6
+ from aiohttp import web
7
+ from datetime import datetime
8
+
9
+ # Debug mode switch
10
+ DEBUG_MODE = False
11
+
12
+ # Define fixed model information
13
+ DEFAULT_MODEL = "llama3.1-8b"
14
+ ALTERNATE_MODEL = "llama3.1-70b"
15
+ FIXED_URL = "https://api.cerebras.ai/v1/chat/completions"
16
+ FIXED_TEMPERATURE = 0.2
17
+ FIXED_TOP_P = 1
18
+ FIXED_MAX_TOKENS = 4096
19
+
20
+ # Log function for basic information
21
+ def log_basic_info(message):
22
+ timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
23
+ print(f"[{timestamp}] {message}")
24
+
25
+ # Asynchronous function to send request and print debug information
26
+ async def send_request(auth_tokens, data):
27
+ try:
28
+ headers = {
29
+ "accept": "application/json",
30
+ "authorization": f"Bearer {auth_tokens[0]}",
31
+ "content-type": "application/json"
32
+ }
33
+
34
+ requested_model = data.get("model", DEFAULT_MODEL)
35
+ model_to_use = ALTERNATE_MODEL if requested_model == ALTERNATE_MODEL else DEFAULT_MODEL
36
+
37
+ log_basic_info(f"Requested model: {requested_model}, Using model: {model_to_use}")
38
+
39
+ payload = {
40
+ "messages": data.get("messages", []),
41
+ "model": model_to_use,
42
+ "temperature": FIXED_TEMPERATURE,
43
+ "top_p": FIXED_TOP_P,
44
+ "max_tokens": FIXED_MAX_TOKENS
45
+ }
46
+
47
+ if DEBUG_MODE:
48
+ print("Request Payload:", json.dumps(payload, indent=4))
49
+ print("Request Headers:", headers)
50
+
51
+ async with aiohttp.ClientSession() as session:
52
+ async with session.post(FIXED_URL, headers=headers, json=payload) as resp:
53
+ response_text = await resp.text()
54
+ response_json = json.loads(response_text)
55
+
56
+ total_tokens = response_json.get('usage', {}).get('total_tokens', 'N/A')
57
+ total_time = response_json.get('time_info', {}).get('total_time', 'N/A')
58
+
59
+ log_basic_info(f"Path: {FIXED_URL}, Status Code: {resp.status}, Total Tokens Used: {total_tokens}, Total Time: {total_time:.3f} seconds")
60
+
61
+ return response_text
62
+
63
+ except Exception as e:
64
+ log_basic_info(f"Exception occurred: {str(e)}")
65
+
66
+ # Main handler function
67
+ async def handle_request(request):
68
+ try:
69
+ request_data = await request.json()
70
+ headers = dict(request.headers)
71
+
72
+ authorization_header = headers.get('Authorization', '')
73
+ auth_tokens = [auth.strip() for auth in authorization_header.replace('Bearer ', '').split(',')]
74
+
75
+ if not auth_tokens:
76
+ return web.json_response({"error": "Missing Authorization token"}, status=400)
77
+
78
+ auth_token = random.choice(auth_tokens)
79
+ headers['Authorization'] = f"Bearer {auth_token}"
80
+
81
+ log_basic_info(f"Received request for path: {request.path}")
82
+
83
+ if DEBUG_MODE:
84
+ print("Received Request Data:", json.dumps(request_data, indent=4))
85
+ print("Received Headers:", headers)
86
+
87
+ response_text = await send_request(auth_tokens, request_data)
88
+
89
+ return web.json_response(json.loads(response_text))
90
+
91
+ except Exception as e:
92
+ log_basic_info(f"Exception occurred in handling request: {str(e)}")
93
+ return web.json_response({"error": str(e)}, status=500)
94
+
95
+ # Set up routes
96
+ app = web.Application()
97
+ app.router.add_post('/hf/v1/chat/completions', handle_request)
98
+
99
+ # Run the server
100
+ if __name__ == '__main__':
101
+ web.run_app(app, host='0.0.0.0', port=7860)