aithink commited on
Commit
b6c2bbd
·
verified ·
1 Parent(s): 4d7d027

Upload 32 files

Browse files
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ secrets.json
2
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+ WORKDIR $HOME/app
3
+ COPY requirements.txt $HOME/app
4
+ RUN mkdir /.cache && chmod 777 /.cache
5
+ RUN pip install -r requirements.txt
6
+ COPY . $HOME/app
7
+ EXPOSE 23333
8
+ CMD ["python", "-m", "apis.chat_api"]
README.md CHANGED
@@ -1,10 +1,167 @@
1
- ---
2
- title: HFLLMAPI
3
- emoji: 🐨
4
- colorFrom: gray
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: HF LLM API
3
+ emoji: ☪️
4
+ colorFrom: gray
5
+ colorTo: gray
6
+ sdk: docker
7
+ app_port: 23333
8
+ ---
9
+
10
+ ## HF-LLM-API
11
+
12
+ ![](https://img.shields.io/github/v/release/Niansuh/HF-LLM-API?label=HF-LLM-API&color=blue&cacheSeconds=60)
13
+
14
+ Huggingface LLM Inference API in OpenAI message format.
15
+
16
+ # Original Project link: https://github.com/Hansimov/HF-LLM-API
17
+
18
+ ## Features
19
+
20
+ - Available Models (2024/04/20):
21
+ - `mistral-7b`, `mixtral-8x7b`, `nous-mixtral-8x7b`, `gemma-7b`, `command-r-plus`, `llama3-70b`, `zephyr-141b`, `gpt-3.5-turbo`
22
+ - Adaptive prompt templates for different models
23
+ - Support OpenAI API format
24
+ - Enable api endpoint via official `openai-python` package
25
+ - Support both stream and no-stream response
26
+ - Support API Key via both HTTP auth header and env variable
27
+ - Docker deployment
28
+
29
+ ## Run API service
30
+
31
+ ### Run in Command Line
32
+
33
+ **Install dependencies:**
34
+
35
+ ```bash
36
+ # pipreqs . --force --mode no-pin
37
+ pip install -r requirements.txt
38
+ ```
39
+
40
+ **Run API:**
41
+
42
+ ```bash
43
+ python -m apis.chat_api
44
+ ```
45
+
46
+ ## Run via Docker
47
+
48
+ **Docker build:**
49
+
50
+ ```bash
51
+ sudo docker build -t hf-llm-api:1.1.3 . --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
52
+ ```
53
+
54
+ **Docker run:**
55
+
56
+ ```bash
57
+ # no proxy
58
+ sudo docker run -p 23333:23333 hf-llm-api:1.1.3
59
+
60
+ # with proxy
61
+ sudo docker run -p 23333:23333 --env http_proxy="http://<server>:<port>" hf-llm-api:1.1.3
62
+ ```
63
+
64
+ ## API Usage
65
+
66
+ ### Using `openai-python`
67
+
68
+ See: [`examples/chat_with_openai.py`](https://github.com/Niansuh/HF-LLM-API/blob/main/examples/chat_with_openai.py)
69
+
70
+ ```py
71
+ from openai import OpenAI
72
+
73
+ # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
74
+ base_url = "http://127.0.0.1:23333"
75
+ # Your own HF_TOKEN
76
+ api_key = "hf_xxxxxxxxxxxxxxxx"
77
+ # use below as non-auth user
78
+ # api_key = "sk-xxx"
79
+
80
+ client = OpenAI(base_url=base_url, api_key=api_key)
81
+ response = client.chat.completions.create(
82
+ model="nous-mixtral-8x7b",
83
+ messages=[
84
+ {
85
+ "role": "user",
86
+ "content": "what is your model",
87
+ }
88
+ ],
89
+ stream=True,
90
+ )
91
+
92
+ for chunk in response:
93
+ if chunk.choices[0].delta.content is not None:
94
+ print(chunk.choices[0].delta.content, end="", flush=True)
95
+ elif chunk.choices[0].finish_reason == "stop":
96
+ print()
97
+ else:
98
+ pass
99
+ ```
100
+
101
+ ### Using post requests
102
+
103
+ See: [`examples/chat_with_post.py`](https://github.com/Niansuh/HF-LLM-API/blob/main/examples/chat_with_post.py)
104
+
105
+
106
+ ```py
107
+ import ast
108
+ import httpx
109
+ import json
110
+ import re
111
+
112
+ # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
113
+ chat_api = "http://127.0.0.1:23333"
114
+ # Your own HF_TOKEN
115
+ api_key = "hf_xxxxxxxxxxxxxxxx"
116
+ # use below as non-auth user
117
+ # api_key = "sk-xxx"
118
+
119
+ requests_headers = {}
120
+ requests_payload = {
121
+ "model": "nous-mixtral-8x7b",
122
+ "messages": [
123
+ {
124
+ "role": "user",
125
+ "content": "what is your model",
126
+ }
127
+ ],
128
+ "stream": True,
129
+ }
130
+
131
+ with httpx.stream(
132
+ "POST",
133
+ chat_api + "/chat/completions",
134
+ headers=requests_headers,
135
+ json=requests_payload,
136
+ timeout=httpx.Timeout(connect=20, read=60, write=20, pool=None),
137
+ ) as response:
138
+ # https://docs.aiohttp.org/en/stable/streams.html
139
+ # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb
140
+ response_content = ""
141
+ for line in response.iter_lines():
142
+ remove_patterns = [r"^\s*data:\s*", r"^\s*\[DONE\]\s*"]
143
+ for pattern in remove_patterns:
144
+ line = re.sub(pattern, "", line).strip()
145
+
146
+ if line:
147
+ try:
148
+ line_data = json.loads(line)
149
+ except Exception as e:
150
+ try:
151
+ line_data = ast.literal_eval(line)
152
+ except:
153
+ print(f"Error: {line}")
154
+ raise e
155
+ # print(f"line: {line_data}")
156
+ delta_data = line_data["choices"][0]["delta"]
157
+ finish_reason = line_data["choices"][0]["finish_reason"]
158
+ if "role" in delta_data:
159
+ role = delta_data["role"]
160
+ if "content" in delta_data:
161
+ delta_content = delta_data["content"]
162
+ response_content += delta_content
163
+ print(delta_content, end="", flush=True)
164
+ if finish_reason == "stop":
165
+ print()
166
+
167
+ ```
__init__.py ADDED
File without changes
apis/__init__.py ADDED
File without changes
apis/chat_api.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import markdown2
3
+ import os
4
+ import sys
5
+ import uvicorn
6
+
7
+ from pathlib import Path
8
+ from typing import Union
9
+
10
+ from fastapi import FastAPI, Depends, HTTPException
11
+ from fastapi.responses import HTMLResponse
12
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
13
+ from pydantic import BaseModel, Field
14
+ from sse_starlette.sse import EventSourceResponse, ServerSentEvent
15
+ from tclogger import logger
16
+
17
+ from constants.models import AVAILABLE_MODELS_DICTS, PRO_MODELS
18
+ from constants.envs import CONFIG, SECRETS
19
+ from networks.exceptions import HfApiException, INVALID_API_KEY_ERROR
20
+
21
+ from messagers.message_composer import MessageComposer
22
+ from mocks.stream_chat_mocker import stream_chat_mock
23
+
24
+ from networks.huggingface_streamer import HuggingfaceStreamer
25
+ from networks.huggingchat_streamer import HuggingchatStreamer
26
+ from networks.openai_streamer import OpenaiStreamer
27
+
28
+
29
+ class ChatAPIApp:
30
+ def __init__(self):
31
+ self.app = FastAPI(
32
+ docs_url="/",
33
+ title=CONFIG["app_name"],
34
+ swagger_ui_parameters={"defaultModelsExpandDepth": -1},
35
+ version=CONFIG["version"],
36
+ )
37
+ self.setup_routes()
38
+
39
+ def get_available_models(self):
40
+ return {"object": "list", "data": AVAILABLE_MODELS_DICTS}
41
+
42
+ def extract_api_key(
43
+ credentials: HTTPAuthorizationCredentials = Depends(HTTPBearer()),
44
+ ):
45
+ api_key = None
46
+ if credentials:
47
+ api_key = credentials.credentials
48
+ env_api_key = SECRETS["HF_LLM_API_KEY"]
49
+ return api_key
50
+
51
+ def auth_api_key(self, api_key: str):
52
+ env_api_key = SECRETS["HF_LLM_API_KEY"]
53
+
54
+ # require no api_key
55
+ if not env_api_key:
56
+ return None
57
+ # user provides HF_TOKEN
58
+ if api_key and api_key.startswith("hf_"):
59
+ return api_key
60
+ # user provides correct API_KEY
61
+ if str(api_key) == str(env_api_key):
62
+ return None
63
+
64
+ raise INVALID_API_KEY_ERROR
65
+
66
+ class ChatCompletionsPostItem(BaseModel):
67
+ model: str = Field(
68
+ default="nous-mixtral-8x7b",
69
+ description="(str) `nous-mixtral-8x7b`",
70
+ )
71
+ messages: list = Field(
72
+ default=[{"role": "user", "content": "Hello, who are you?"}],
73
+ description="(list) Messages",
74
+ )
75
+ temperature: Union[float, None] = Field(
76
+ default=0.5,
77
+ description="(float) Temperature",
78
+ )
79
+ top_p: Union[float, None] = Field(
80
+ default=0.95,
81
+ description="(float) top p",
82
+ )
83
+ max_tokens: Union[int, None] = Field(
84
+ default=-1,
85
+ description="(int) Max tokens",
86
+ )
87
+ use_cache: bool = Field(
88
+ default=False,
89
+ description="(bool) Use cache",
90
+ )
91
+ stream: bool = Field(
92
+ default=True,
93
+ description="(bool) Stream",
94
+ )
95
+
96
+ def chat_completions(
97
+ self, item: ChatCompletionsPostItem, api_key: str = Depends(extract_api_key)
98
+ ):
99
+ try:
100
+ api_key = self.auth_api_key(api_key)
101
+
102
+ if item.model == "gpt-3.5-turbo":
103
+ streamer = OpenaiStreamer()
104
+ stream_response = streamer.chat_response(messages=item.messages)
105
+ elif item.model in PRO_MODELS:
106
+ streamer = HuggingchatStreamer(model=item.model)
107
+ stream_response = streamer.chat_response(
108
+ messages=item.messages,
109
+ )
110
+ else:
111
+ streamer = HuggingfaceStreamer(model=item.model)
112
+ composer = MessageComposer(model=item.model)
113
+ composer.merge(messages=item.messages)
114
+ stream_response = streamer.chat_response(
115
+ prompt=composer.merged_str,
116
+ temperature=item.temperature,
117
+ top_p=item.top_p,
118
+ max_new_tokens=item.max_tokens,
119
+ api_key=api_key,
120
+ use_cache=item.use_cache,
121
+ )
122
+
123
+ if item.stream:
124
+ event_source_response = EventSourceResponse(
125
+ streamer.chat_return_generator(stream_response),
126
+ media_type="text/event-stream",
127
+ ping=2000,
128
+ ping_message_factory=lambda: ServerSentEvent(**{"comment": ""}),
129
+ )
130
+ return event_source_response
131
+ else:
132
+ data_response = streamer.chat_return_dict(stream_response)
133
+ return data_response
134
+ except HfApiException as e:
135
+ raise HTTPException(status_code=e.status_code, detail=e.detail)
136
+ except Exception as e:
137
+ raise HTTPException(status_code=500, detail=str(e))
138
+
139
+ def get_readme(self):
140
+ readme_path = Path(__file__).parents[1] / "README.md"
141
+ with open(readme_path, "r", encoding="utf-8") as rf:
142
+ readme_str = rf.read()
143
+ readme_html = markdown2.markdown(
144
+ readme_str, extras=["table", "fenced-code-blocks", "highlightjs-lang"]
145
+ )
146
+ return readme_html
147
+
148
+ def setup_routes(self):
149
+ for prefix in ["", "/v1", "/api", "/api/v1"]:
150
+ if prefix in ["/api/v1"]:
151
+ include_in_schema = True
152
+ else:
153
+ include_in_schema = False
154
+
155
+ self.app.get(
156
+ prefix + "/models",
157
+ summary="Get available models",
158
+ include_in_schema=include_in_schema,
159
+ )(self.get_available_models)
160
+
161
+ self.app.post(
162
+ prefix + "/chat/completions",
163
+ summary="Chat completions in conversation session",
164
+ include_in_schema=include_in_schema,
165
+ )(self.chat_completions)
166
+ self.app.get(
167
+ "/readme",
168
+ summary="README of HF LLM API",
169
+ response_class=HTMLResponse,
170
+ include_in_schema=False,
171
+ )(self.get_readme)
172
+
173
+
174
+ class ArgParser(argparse.ArgumentParser):
175
+ def __init__(self, *args, **kwargs):
176
+ super(ArgParser, self).__init__(*args, **kwargs)
177
+
178
+ self.add_argument(
179
+ "-s",
180
+ "--host",
181
+ type=str,
182
+ default=CONFIG["host"],
183
+ help=f"Host for {CONFIG['app_name']}",
184
+ )
185
+ self.add_argument(
186
+ "-p",
187
+ "--port",
188
+ type=int,
189
+ default=CONFIG["port"],
190
+ help=f"Port for {CONFIG['app_name']}",
191
+ )
192
+
193
+ self.add_argument(
194
+ "-d",
195
+ "--dev",
196
+ default=False,
197
+ action="store_true",
198
+ help="Run in dev mode",
199
+ )
200
+
201
+ self.args = self.parse_args(sys.argv[1:])
202
+
203
+
204
+ app = ChatAPIApp().app
205
+
206
+ if __name__ == "__main__":
207
+ args = ArgParser().args
208
+ if args.dev:
209
+ uvicorn.run("__main__:app", host=args.host, port=args.port, reload=True)
210
+ else:
211
+ uvicorn.run("__main__:app", host=args.host, port=args.port, reload=False)
212
+
213
+ # python -m apis.chat_api # [Docker] on product mode
214
+ # python -m apis.chat_api -d # [Dev] on develop mode
configs/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
configs/config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "app_name": "HuggingFace LLM API",
3
+ "version": "1.4.1a",
4
+ "host": "0.0.0.0",
5
+ "port": 23333
6
+ }
configs/secrets_template.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "http_proxy": "http://127.0.0.1:11111",
3
+ "HF_LLM_API_KEY": "********"
4
+ }
constants/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
constants/envs.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ from tclogger import logger, OSEnver
3
+
4
+
5
+ config_root = Path(__file__).parents[1] / "configs"
6
+
7
+ secrets_path = config_root / "secrets.json"
8
+ SECRETS = OSEnver(secrets_path)
9
+
10
+ http_proxy = SECRETS["http_proxy"]
11
+ if http_proxy:
12
+ logger.note(f"> Using proxy: {http_proxy}")
13
+ PROXIES = {
14
+ "http": http_proxy,
15
+ "https": http_proxy,
16
+ }
17
+ else:
18
+ PROXIES = None
19
+
20
+ config_path = config_root / "config.json"
21
+ CONFIG = OSEnver(config_path)
constants/headers.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ REQUESTS_HEADERS = {
2
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
3
+ }
4
+
5
+ HUGGINGCHAT_POST_HEADERS = {
6
+ "Accept-Encoding": "gzip, deflate, br, zstd",
7
+ "Accept-Language": "en-US,en;q=0.9",
8
+ "Cache-Control": "no-cache",
9
+ "Content-Type": "application/json",
10
+ "Origin": "https://huggingface.co",
11
+ "Pragma": "no-cache",
12
+ "Referer": "https://huggingface.co/chat/",
13
+ "Sec-Ch-Ua": 'Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
14
+ "Sec-Ch-Ua-Mobile": "?0",
15
+ "Sec-Ch-Ua-Platform": '"Windows"',
16
+ "Sec-Fetch-Dest": "empty",
17
+ "Sec-Fetch-Mode": "cors",
18
+ "Sec-Fetch-Site": "same-origin",
19
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
20
+ }
21
+
22
+ HUGGINGCHAT_SETTINGS_POST_DATA = {
23
+ "assistants": [],
24
+ "customPrompts": {},
25
+ "ethicsModalAccepted": True,
26
+ "ethicsModalAcceptedAt": None,
27
+ "hideEmojiOnSidebar": False,
28
+ "recentlySaved": False,
29
+ "searchEnabled": True,
30
+ "shareConversationsWithModelAuthors": True,
31
+ }
32
+
33
+ OPENAI_GET_HEADERS = {
34
+ # "Accept": "*/*",
35
+ "Accept-Encoding": "gzip, deflate, br, zstd",
36
+ "Accept-Language": "en-US,en;q=0.9",
37
+ "Cache-Control": "no-cache",
38
+ "Content-Type": "application/json",
39
+ # "Oai-Device-Id": self.uuid,
40
+ "Oai-Language": "en-US",
41
+ "Pragma": "no-cache",
42
+ "Referer": "https://chat.openai.com/",
43
+ "Sec-Ch-Ua": 'Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
44
+ "Sec-Ch-Ua-Mobile": "?0",
45
+ "Sec-Ch-Ua-Platform": '"Windows"',
46
+ "Sec-Fetch-Dest": "empty",
47
+ "Sec-Fetch-Mode": "cors",
48
+ "Sec-Fetch-Site": "same-origin",
49
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
50
+ }
51
+
52
+
53
+ OPENAI_POST_DATA = {
54
+ "action": "next",
55
+ # "conversation_id": "...",
56
+ "conversation_mode": {"kind": "primary_assistant"},
57
+ "force_nulligen": False,
58
+ "force_paragen": False,
59
+ "force_paragen_model_slug": "",
60
+ "force_rate_limit": False,
61
+ "history_and_training_disabled": False,
62
+ # "messages": [...],
63
+ "model": "text-davinci-002-render-sha",
64
+ "parent_message_id": "",
65
+ "suggestions": [],
66
+ "timezone_offset_min": -480,
67
+ }
constants/models.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MODEL_MAP = {
2
+ "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1", # [Recommended]
3
+ "nous-mixtral-8x7b": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
4
+ "Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
5
+ "Mistral-Nemo-Instruct-2407": "mistralai/Mistral-Nemo-Instruct-2407",
6
+ "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2",
7
+ "yi-1.5-34b": "01-ai/Yi-1.5-34B-Chat",
8
+ "gemma-7b": "google/gemma-1.1-7b-it",
9
+ # "openchat-3.5": "openchat/openchat-3.5-0106",
10
+ # "command-r-plus": "CohereForAI/c4ai-command-r-plus",
11
+ # "llama3-70b": "meta-llama/Meta-Llama-3-70B-Instruct",
12
+ # "zephyr-141b": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
13
+ "default": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
14
+ }
15
+
16
+ AVAILABLE_MODELS = list(MODEL_MAP.keys())
17
+
18
+ PRO_MODELS = ["command-r-plus", "llama3-70b", "zephyr-141b"]
19
+
20
+ STOP_SEQUENCES_MAP = {
21
+ # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/blob/main/tokenizer_config.json#L33
22
+ "mixtral-8x7b": "</s>",
23
+ # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/blob/main/tokenizer_config.json#L50
24
+ "nous-mixtral-8x7b": "<|im_end|>",
25
+ # https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct/blob/1d54af340dc8906a2d21146191a9c184c35e47bd/tokenizer_config.json#L2055
26
+ "Llama-3.1-70B-Instruct": "<|eot_id|>",
27
+ # https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct/blob/1d54af340dc8906a2d21146191a9c184c35e47bd/tokenizer_config.json#L2055
28
+ "Mistral-Nemo-Instruct-2407": "</s>",
29
+ # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/blob/main/tokenizer_config.json#L33
30
+ "mistral-7b": "</s>",
31
+ # https://huggingface.co/01-ai/Yi-1.5-34B-Chat/blob/main/tokenizer_config.json#L42
32
+ "yi-1.5-34b": "<|im_end|>",
33
+ # https://huggingface.co/google/gemma-1.1-7b-it/blob/main/tokenizer_config.json#L1509
34
+ "gemma-7b": "<eos>",
35
+ # "openchat-3.5": "<|end_of_turn|>",
36
+ # "command-r-plus": "<|END_OF_TURN_TOKEN|>",
37
+ }
38
+
39
+ TOKEN_LIMIT_MAP = {
40
+ "mixtral-8x7b": 32768,
41
+ "nous-mixtral-8x7b": 32768,
42
+ "Llama-3.1-70B-Instruct": 32768,
43
+ "Mistral-Nemo-Instruct-2407": 1024000,
44
+ "mistral-7b": 32768,
45
+ "yi-1.5-34b": 4096,
46
+ "gemma-7b": 8192,
47
+ # "openchat-3.5": 8192,
48
+ # "command-r-plus": 32768,
49
+ # "llama3-70b": 8192,
50
+ # "zephyr-141b": 2048,
51
+ # "gpt-3.5-turbo": 8192,
52
+ }
53
+
54
+ TOKEN_RESERVED = 20
55
+
56
+
57
+ # https://platform.openai.com/docs/api-reference/models/list
58
+ AVAILABLE_MODELS_DICTS = [
59
+ {
60
+ "id": "mixtral-8x7b",
61
+ "description": "[mistralai/Mixtral-8x7B-Instruct-v0.1]: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
62
+ "object": "model",
63
+ "created": 1700000000,
64
+ "owned_by": "mistralai",
65
+ },
66
+ {
67
+ "id": "nous-mixtral-8x7b",
68
+ "description": "[NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO]: https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
69
+ "object": "model",
70
+ "created": 1700000000,
71
+ "owned_by": "NousResearch",
72
+ },
73
+ {
74
+ "id": "Mistral-Nemo-Instruct-2407",
75
+ "description": "[mistralai/Mistral-Nemo-Instruct-2407]: https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407",
76
+ "object": "model",
77
+ "created": 1700000000,
78
+ "owned_by": "mistralai",
79
+ },
80
+ {
81
+ "id": "Llama-3.1-70B-Instruct",
82
+ "description": "[meta-llama/Llama-3.1-70B-Instruct]: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
83
+ "object": "model",
84
+ "created": 1700000000,
85
+ "owned_by": "mistralai",
86
+ },
87
+ {
88
+ "id": "mistral-7b",
89
+ "description": "[mistralai/Mistral-7B-Instruct-v0.2]: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
90
+ "object": "model",
91
+ "created": 1700000000,
92
+ "owned_by": "mistralai",
93
+ },
94
+ {
95
+ "id": "yi-1.5-34b",
96
+ "description": "[01-ai/Yi-1.5-34B-Chat]: https://huggingface.co/01-ai/Yi-1.5-34B-Chat",
97
+ "object": "model",
98
+ "created": 1700000000,
99
+ "owned_by": "01-ai",
100
+ },
101
+ {
102
+ "id": "gemma-7b",
103
+ "description": "[google/gemma-1.1-7b-it]: https://huggingface.co/google/gemma-1.1-7b-it",
104
+ "object": "model",
105
+ "created": 1700000000,
106
+ "owned_by": "Google",
107
+ },
108
+ # {
109
+ # "id": "openchat-3.5",
110
+ # "description": "[openchat/openchat-3.5-0106]: https://huggingface.co/openchat/openchat-3.5-0106",
111
+ # "object": "model",
112
+ # "created": 1700000000,
113
+ # "owned_by": "openchat",
114
+ # },
115
+ # {
116
+ # "id": "command-r-plus",
117
+ # "description": "[CohereForAI/c4ai-command-r-plus]: https://huggingface.co/CohereForAI/c4ai-command-r-plus",
118
+ # "object": "model",
119
+ # "created": 1700000000,
120
+ # "owned_by": "CohereForAI",
121
+ # },
122
+ # {
123
+ # "id": "llama3-70b",
124
+ # "description": "[meta-llama/Meta-Llama-3-70B]: https://huggingface.co/meta-llama/Meta-Llama-3-70B",
125
+ # "object": "model",
126
+ # "created": 1700000000,
127
+ # "owned_by": "Meta",
128
+ # },
129
+ # {
130
+ # "id": "zephyr-141b",
131
+ # "description": "[HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1]: https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
132
+ # "object": "model",
133
+ # "created": 1700000000,
134
+ # "owned_by": "Huggingface",
135
+ # },
136
+ # {
137
+ # "id": "gpt-3.5-turbo",
138
+ # "description": "[openai/gpt-3.5-turbo]: https://platform.openai.com/docs/models/gpt-3-5-turbo",
139
+ # "object": "model",
140
+ # "created": 1700000000,
141
+ # "owned_by": "OpenAI",
142
+ # },
143
+ ]
examples/__init__.py ADDED
File without changes
examples/chat_with_openai.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+
3
+ # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
4
+ base_url = "http://127.0.0.1:23333"
5
+ api_key = "sk-xxxxx"
6
+
7
+ client = OpenAI(base_url=base_url, api_key=api_key)
8
+ response = client.chat.completions.create(
9
+ model="nous-mixtral-8x7b",
10
+ messages=[
11
+ {
12
+ "role": "user",
13
+ "content": "what is your model",
14
+ }
15
+ ],
16
+ stream=True,
17
+ )
18
+
19
+ for chunk in response:
20
+ if chunk.choices[0].delta.content is not None:
21
+ print(chunk.choices[0].delta.content, end="", flush=True)
22
+ elif chunk.choices[0].finish_reason == "stop":
23
+ print()
24
+ else:
25
+ pass
examples/chat_with_post.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import httpx
3
+ import json
4
+ import re
5
+
6
+ # If runnning this service with proxy, you might need to unset `http(s)_proxy`.
7
+ chat_api = "http://127.0.0.1:23333"
8
+ api_key = "sk-xxxxx"
9
+ requests_headers = {}
10
+ requests_payload = {
11
+ "model": "nous-mixtral-8x7b",
12
+ "messages": [
13
+ {
14
+ "role": "user",
15
+ "content": "what is your model",
16
+ }
17
+ ],
18
+ "stream": True,
19
+ }
20
+
21
+ with httpx.stream(
22
+ "POST",
23
+ chat_api + "/chat/completions",
24
+ headers=requests_headers,
25
+ json=requests_payload,
26
+ timeout=httpx.Timeout(connect=20, read=60, write=20, pool=None),
27
+ ) as response:
28
+ # https://docs.aiohttp.org/en/stable/streams.html
29
+ # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb
30
+ response_content = ""
31
+ for line in response.iter_lines():
32
+ remove_patterns = [r"^\s*data:\s*", r"^\s*\[DONE\]\s*"]
33
+ for pattern in remove_patterns:
34
+ line = re.sub(pattern, "", line).strip()
35
+
36
+ if line:
37
+ try:
38
+ line_data = json.loads(line)
39
+ except Exception as e:
40
+ try:
41
+ line_data = ast.literal_eval(line)
42
+ except:
43
+ print(f"Error: {line}")
44
+ raise e
45
+ # print(f"line: {line_data}")
46
+ delta_data = line_data["choices"][0]["delta"]
47
+ finish_reason = line_data["choices"][0]["finish_reason"]
48
+ if "role" in delta_data:
49
+ role = delta_data["role"]
50
+ if "content" in delta_data:
51
+ delta_content = delta_data["content"]
52
+ response_content += delta_content
53
+ print(delta_content, end="", flush=True)
54
+ if finish_reason == "stop":
55
+ print()
messagers/__init__.py ADDED
File without changes
messagers/message_composer.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from pprint import pprint
3
+
4
+ from transformers import AutoTokenizer
5
+
6
+ from constants.models import AVAILABLE_MODELS, MODEL_MAP
7
+ from tclogger import logger
8
+
9
+
10
+ class MessageComposer:
11
+ def __init__(self, model: str = None):
12
+ if model in AVAILABLE_MODELS:
13
+ self.model = model
14
+ else:
15
+ self.model = "nous-mixtral-8x7b"
16
+ self.model_fullname = MODEL_MAP[self.model]
17
+ self.system_roles = ["system"]
18
+ self.inst_roles = ["user", "system", "inst"]
19
+ self.answer_roles = ["assistant", "bot", "answer", "model"]
20
+ self.default_role = "user"
21
+
22
+ def concat_messages_by_role(self, messages):
23
+ def is_same_role(role1, role2):
24
+ if (
25
+ (role1 == role2)
26
+ or (role1 in self.inst_roles and role2 in self.inst_roles)
27
+ or (role1 in self.answer_roles and role2 in self.answer_roles)
28
+ ):
29
+ return True
30
+ else:
31
+ return False
32
+
33
+ concat_messages = []
34
+ for message in messages:
35
+ role = message["role"]
36
+ content = message["content"]
37
+ if concat_messages and is_same_role(role, concat_messages[-1]["role"]):
38
+ concat_messages[-1]["content"] += "\n" + content
39
+ else:
40
+ if role in self.inst_roles:
41
+ message["role"] = "inst"
42
+ elif role in self.answer_roles:
43
+ message["role"] = "answer"
44
+ else:
45
+ message["role"] = "inst"
46
+ concat_messages.append(message)
47
+ return concat_messages
48
+
49
+ def merge(self, messages) -> str:
50
+ # Templates for Chat Models
51
+ # - https://huggingface.co/docs/transformers/main/en/chat_templating
52
+ # - https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1#instruction-format
53
+ # - https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
54
+ # - https://huggingface.co/openchat/openchat-3.5-0106
55
+ # - https://huggingface.co/google/gemma-7b-it#chat-template
56
+
57
+ # Mistral and Mixtral:
58
+ # <s> [INST] Instruction [/INST] Model answer </s> [INST] Follow-up instruction [/INST]
59
+
60
+ # Nous Mixtral:
61
+ # <|im_start|>system
62
+ # You are "Hermes 2".<|im_end|>
63
+ # <|im_start|>user
64
+ # Hello, who are you?<|im_end|>
65
+ # <|im_start|>assistant
66
+
67
+ # OpenChat:
68
+ # GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi<|end_of_turn|>GPT4 Correct User: How are you today?<|end_of_turn|>GPT4 Correct Assistant:
69
+
70
+ # Google Gemma-it
71
+ # <start_of_turn>user
72
+ # How does the brain work?<end_of_turn>
73
+ # <start_of_turn>model
74
+
75
+ self.messages = messages
76
+ self.merged_str = ""
77
+
78
+ # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1#instruction-format
79
+ if self.model in ["mixtral-8x7b", "mistral-7b"]:
80
+ self.messages = self.concat_messages_by_role(messages)
81
+ self.cached_str = ""
82
+ for message in self.messages:
83
+ role = message["role"]
84
+ content = message["content"]
85
+ if role in self.inst_roles:
86
+ self.cached_str = f"[INST] {content} [/INST]"
87
+ elif role in self.answer_roles:
88
+ self.merged_str += f"<s> {self.cached_str} {content} </s>\n"
89
+ self.cached_str = ""
90
+ else:
91
+ self.cached_str = f"[INST] {content} [/INST]"
92
+ if self.cached_str:
93
+ self.merged_str += f"{self.cached_str}"
94
+ # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
95
+ elif self.model in ["nous-mixtral-8x7b"]:
96
+ self.merged_str_list = []
97
+ for message in self.messages:
98
+ role = message["role"]
99
+ content = message["content"]
100
+ if role not in ["system", "user", "assistant"]:
101
+ role = self.default_role
102
+ message_line = f"<|im_start|>{role}\n{content}<|im_end|>"
103
+ self.merged_str_list.append(message_line)
104
+ self.merged_str_list.append("<|im_start|>assistant")
105
+ self.merged_str = "\n".join(self.merged_str_list)
106
+ # https://huggingface.co/openchat/openchat-3.5-0106
107
+ elif self.model in ["openchat-3.5"]:
108
+ self.messages = self.concat_messages_by_role(messages)
109
+ self.merged_str_list = []
110
+ self.end_of_turn = "<|end_of_turn|>"
111
+ for message in self.messages:
112
+ role = message["role"]
113
+ content = message["content"]
114
+ if role in self.inst_roles:
115
+ self.merged_str_list.append(
116
+ f"GPT4 Correct User:\n{content}{self.end_of_turn}"
117
+ )
118
+ elif role in self.answer_roles:
119
+ self.merged_str_list.append(
120
+ f"GPT4 Correct Assistant:\n{content}{self.end_of_turn}"
121
+ )
122
+ else:
123
+ self.merged_str_list.append(
124
+ f"GPT4 Correct User: {content}{self.end_of_turn}"
125
+ )
126
+ self.merged_str_list.append(f"GPT4 Correct Assistant:\n")
127
+ self.merged_str = "\n".join(self.merged_str_list)
128
+ # https://huggingface.co/google/gemma-1.1-7b-it#chat-template
129
+ elif self.model in ["gemma-7b"]:
130
+ self.messages = self.concat_messages_by_role(messages)
131
+ self.merged_str_list = []
132
+ self.end_of_turn = "<end_of_turn>"
133
+ self.start_of_turn = "<start_of_turn>"
134
+ for message in self.messages:
135
+ role = message["role"]
136
+ content = message["content"]
137
+ if role in self.inst_roles:
138
+ self.merged_str_list.append(
139
+ f"{self.start_of_turn}user\n{content}{self.end_of_turn}"
140
+ )
141
+ elif role in self.answer_roles:
142
+ self.merged_str_list.append(
143
+ f"{self.start_of_turn}model\n{content}{self.end_of_turn}"
144
+ )
145
+ else:
146
+ self.merged_str_list.append(
147
+ f"{self.start_of_turn}user\n{content}{self.end_of_turn}"
148
+ )
149
+ self.merged_str_list.append(f"{self.start_of_turn}model\n")
150
+ self.merged_str = "<bos>" + "\n".join(self.merged_str_list)
151
+ # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
152
+ # https://huggingface.co/openchat/openchat-3.5-0106
153
+ # https://huggingface.co/01-ai/Yi-1.5-34B-Chat
154
+ elif self.model in ["openchat-3.5", "command-r-plus", "gemma-7b", "yi-1.5-34b"]:
155
+ # https://discuss.huggingface.co/t/error-with-new-tokenizers-urgent/2847/5
156
+ tokenizer = AutoTokenizer.from_pretrained(
157
+ self.model_fullname, use_fast=False
158
+ )
159
+ self.merged_str = tokenizer.apply_chat_template(
160
+ messages, tokenize=False, add_generation_prompt=True
161
+ )
162
+ else:
163
+ self.merged_str = "\n\n".join(
164
+ [f"{message['role']}: {message['content']}" for message in messages]
165
+ )
166
+
167
+ return self.merged_str
168
+
169
+ def decompose_to_system_and_input_prompt(
170
+ self, messages: list[dict], append_assistant=True
171
+ ):
172
+ system_prompt_list = []
173
+ user_and_assistant_messages = []
174
+ for message in messages:
175
+ role = message["role"]
176
+ content = message["content"]
177
+ if role in self.system_roles:
178
+ system_prompt_list.append(content)
179
+ else:
180
+ user_and_assistant_messages.append(message)
181
+ system_prompt = "\n".join(system_prompt_list)
182
+
183
+ input_prompt_list = []
184
+ input_messages = self.concat_messages_by_role(user_and_assistant_messages)
185
+ for message in input_messages:
186
+ role = message["role"]
187
+ content = message["content"]
188
+ if role in self.answer_roles:
189
+ role_content_str = f"`assistant`:\n{content}"
190
+ else:
191
+ role_content_str = f"`user`:\n{content}"
192
+ input_prompt_list.append(role_content_str)
193
+ input_prompt = "\n\n".join(input_prompt_list)
194
+
195
+ if append_assistant:
196
+ input_prompt += "\n\n`assistant`:"
197
+
198
+ return system_prompt, input_prompt
199
+
200
+
201
+ if __name__ == "__main__":
202
+ # model = "mixtral-8x7b"
203
+ # model = "nous-mixtral-8x7b"
204
+ model = "gemma-7b"
205
+ # model = "openchat-3.5"
206
+ # model = "command-r-plus"
207
+ composer = MessageComposer(model)
208
+ messages = [
209
+ {
210
+ "role": "system",
211
+ "content": "You are a LLM developed by OpenAI.\nYour name is GPT-4.",
212
+ },
213
+ {"role": "user", "content": "Hello, who are you?"},
214
+ {"role": "assistant", "content": "I am a bot."},
215
+ {"role": "user", "content": "What is your name?"},
216
+ # {"role": "assistant", "content": "My name is Bing."},
217
+ # {"role": "user", "content": "Tell me a joke."},
218
+ # {"role": "assistant", "content": "What is a robot's favorite type of music?"},
219
+ # {
220
+ # "role": "user",
221
+ # "content": "How many questions have I asked? Please list them.",
222
+ # },
223
+ ]
224
+ # logger.note(f"model: {composer.model}")
225
+ # merged_str = composer.merge(messages)
226
+ # logger.note("merged_str:")
227
+ # logger.mesg(merged_str)
228
+
229
+ system_prompt, input_prompt = composer.decompose_to_system_and_input_prompt(
230
+ messages
231
+ )
232
+ logger.note("system_prompt:")
233
+ logger.mesg(system_prompt)
234
+ logger.note("input_prompt:")
235
+ logger.mesg(input_prompt)
236
+
237
+ # python -m messagers.message_composer
messagers/message_outputer.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+
4
+ class OpenaiStreamOutputer:
5
+ """
6
+ Create chat completion - OpenAI API Documentation
7
+ * https://platform.openai.com/docs/api-reference/chat/create
8
+ """
9
+
10
+ def __init__(self, owned_by="huggingface", model="nous-mixtral-8x7b"):
11
+ self.default_data = {
12
+ "created": 1700000000,
13
+ "id": f"chatcmpl-{owned_by}",
14
+ "object": "chat.completion.chunk",
15
+ # "content_type": "Completions",
16
+ "model": model,
17
+ "choices": [],
18
+ }
19
+
20
+ def data_to_string(self, data={}, content_type=""):
21
+ data_str = f"{json.dumps(data)}"
22
+ return data_str
23
+
24
+ def output(self, content=None, content_type="Completions") -> str:
25
+ data = self.default_data.copy()
26
+ if content_type == "Role":
27
+ data["choices"] = [
28
+ {
29
+ "index": 0,
30
+ "delta": {"role": "assistant"},
31
+ "finish_reason": None,
32
+ }
33
+ ]
34
+ elif content_type in [
35
+ "Completions",
36
+ "InternalSearchQuery",
37
+ "InternalSearchResult",
38
+ "SuggestedResponses",
39
+ ]:
40
+ if content_type in ["InternalSearchQuery", "InternalSearchResult"]:
41
+ content += "\n"
42
+ data["choices"] = [
43
+ {
44
+ "index": 0,
45
+ "delta": {"content": content},
46
+ "finish_reason": None,
47
+ }
48
+ ]
49
+ elif content_type == "Finished":
50
+ data["choices"] = [
51
+ {
52
+ "index": 0,
53
+ "delta": {},
54
+ "finish_reason": "stop",
55
+ }
56
+ ]
57
+ else:
58
+ data["choices"] = [
59
+ {
60
+ "index": 0,
61
+ "delta": {},
62
+ "finish_reason": None,
63
+ }
64
+ ]
65
+ return self.data_to_string(data, content_type)
messagers/token_checker.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from tclogger import logger
2
+ from transformers import AutoTokenizer
3
+
4
+ from constants.models import MODEL_MAP, TOKEN_LIMIT_MAP, TOKEN_RESERVED
5
+
6
+
7
+ class TokenChecker:
8
+ def __init__(self, input_str: str, model: str):
9
+ self.input_str = input_str
10
+
11
+ if model in MODEL_MAP.keys():
12
+ self.model = model
13
+ else:
14
+ self.model = "nous-mixtral-8x7b"
15
+
16
+ self.model_fullname = MODEL_MAP[self.model]
17
+
18
+ # As some models are gated, we need to fetch tokenizers from alternatives
19
+ GATED_MODEL_MAP = {
20
+ "llama3-70b": "NousResearch/Meta-Llama-3-70B",
21
+ "gemma-7b": "unsloth/gemma-7b",
22
+ "mistral-7b": "dfurman/Mistral-7B-Instruct-v0.2",
23
+ "mixtral-8x7b": "dfurman/Mixtral-8x7B-Instruct-v0.1",
24
+ }
25
+ if self.model in GATED_MODEL_MAP.keys():
26
+ self.tokenizer = AutoTokenizer.from_pretrained(GATED_MODEL_MAP[self.model])
27
+ else:
28
+ self.tokenizer = AutoTokenizer.from_pretrained(self.model_fullname)
29
+
30
+ def count_tokens(self):
31
+ token_count = len(self.tokenizer.encode(self.input_str))
32
+ logger.note(f"Prompt Token Count: {token_count}")
33
+ return token_count
34
+
35
+ def get_token_limit(self):
36
+ return TOKEN_LIMIT_MAP[self.model]
37
+
38
+ def get_token_redundancy(self):
39
+ return int(self.get_token_limit() - TOKEN_RESERVED - self.count_tokens())
40
+
41
+ def check_token_limit(self):
42
+ if self.get_token_redundancy() <= 0:
43
+ raise ValueError(
44
+ f"Prompt exceeded token limit: {self.count_tokens()} > {self.get_token_limit()}"
45
+ )
46
+ return True
mocks/__init__.py ADDED
File without changes
mocks/stream_chat_mocker.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from tclogger import logger
3
+
4
+
5
+ def stream_chat_mock(*args, **kwargs):
6
+ logger.note(msg=str(args) + str(kwargs))
7
+ for i in range(10):
8
+ content = f"W{i+1} "
9
+ time.sleep(0.1)
10
+ logger.mesg(content, end="")
11
+ yield content
12
+ logger.mesg("")
13
+ yield ""
networks/__init__.py ADDED
File without changes
networks/exceptions.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import http
2
+
3
+ from typing import Optional
4
+
5
+ from fastapi import HTTPException, status
6
+
7
+
8
+ class HfApiException(Exception):
9
+ def __init__(
10
+ self,
11
+ status_code: int,
12
+ detail: Optional[str] = None,
13
+ ) -> None:
14
+ if detail is None:
15
+ self.detail = http.HTTPStatus(status_code).phrase
16
+ else:
17
+ self.detail = detail
18
+ self.status_code = status_code
19
+
20
+ def __repr__(self) -> str:
21
+ class_name = self.__class__.__name__
22
+ return f"{class_name}(status_code={self.status_code!r}, detail={self.detail!r})"
23
+
24
+ def __str__(self) -> str:
25
+ return self.__repr__()
26
+
27
+
28
+ INVALID_API_KEY_ERROR = HfApiException(
29
+ status_code=status.HTTP_403_FORBIDDEN,
30
+ detail="Invalid API Key",
31
+ )
networks/huggingchat_streamer.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import re
4
+
5
+ import requests
6
+ from curl_cffi import requests as cffi_requests
7
+
8
+ from tclogger import logger
9
+
10
+ from constants.models import MODEL_MAP
11
+ from constants.envs import PROXIES
12
+ from constants.headers import HUGGINGCHAT_POST_HEADERS, HUGGINGCHAT_SETTINGS_POST_DATA
13
+ from messagers.message_outputer import OpenaiStreamOutputer
14
+ from messagers.message_composer import MessageComposer
15
+ from messagers.token_checker import TokenChecker
16
+
17
+
18
+ class HuggingchatRequester:
19
+ def __init__(self, model: str):
20
+ if model in MODEL_MAP.keys():
21
+ self.model = model
22
+ else:
23
+ self.model = "nous-mixtral-8x7b"
24
+ self.model_fullname = MODEL_MAP[self.model]
25
+
26
+ def get_hf_chat_id(self):
27
+ request_url = "https://huggingface.co/chat/settings"
28
+ request_body = copy.deepcopy(HUGGINGCHAT_SETTINGS_POST_DATA)
29
+ extra_body = {
30
+ "activeModel": self.model_fullname,
31
+ }
32
+ request_body.update(extra_body)
33
+ logger.note(f"> hf-chat ID:", end=" ")
34
+
35
+ res = cffi_requests.post(
36
+ request_url,
37
+ headers=HUGGINGCHAT_POST_HEADERS,
38
+ json=request_body,
39
+ proxies=PROXIES,
40
+ timeout=10,
41
+ impersonate="chrome",
42
+ )
43
+ self.hf_chat_id = res.cookies.get("hf-chat")
44
+ if self.hf_chat_id:
45
+ logger.success(f"[{self.hf_chat_id}]")
46
+ else:
47
+ logger.warn(f"[{res.status_code}]")
48
+ logger.warn(res.text)
49
+ raise ValueError(f"Failed to get hf-chat ID: {res.text}")
50
+
51
+ def get_conversation_id(self, system_prompt: str = ""):
52
+ request_url = "https://huggingface.co/chat/conversation"
53
+ request_headers = HUGGINGCHAT_POST_HEADERS
54
+ extra_headers = {
55
+ "Cookie": f"hf-chat={self.hf_chat_id}",
56
+ }
57
+ request_headers.update(extra_headers)
58
+ request_body = {
59
+ "model": self.model_fullname,
60
+ "preprompt": system_prompt,
61
+ }
62
+ logger.note(f"> Conversation ID:", end=" ")
63
+
64
+ res = requests.post(
65
+ request_url,
66
+ headers=request_headers,
67
+ json=request_body,
68
+ proxies=PROXIES,
69
+ timeout=10,
70
+ )
71
+ if res.status_code == 200:
72
+ conversation_id = res.json()["conversationId"]
73
+ logger.success(f"[{conversation_id}]")
74
+ else:
75
+ logger.warn(f"[{res.status_code}]")
76
+ raise ValueError("Failed to get conversation ID!")
77
+ self.conversation_id = conversation_id
78
+ return conversation_id
79
+
80
+ def get_last_message_id(self):
81
+ request_url = f"https://huggingface.co/chat/conversation/{self.conversation_id}/__data.json?x-sveltekit-invalidated=11"
82
+ request_headers = HUGGINGCHAT_POST_HEADERS
83
+ extra_headers = {
84
+ "Cookie": f"hf-chat={self.hf_chat_id}",
85
+ }
86
+ request_headers.update(extra_headers)
87
+ logger.note(f"> Message ID:", end=" ")
88
+
89
+ message_id = None
90
+ res = requests.post(
91
+ request_url,
92
+ headers=request_headers,
93
+ proxies=PROXIES,
94
+ timeout=10,
95
+ )
96
+ if res.status_code == 200:
97
+ data = res.json()["nodes"][1]["data"]
98
+ # find the last element which matches the format of uuid4
99
+ uuid_pattern = re.compile(
100
+ r"^[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}$"
101
+ )
102
+ for item in data:
103
+ if type(item) == str and uuid_pattern.match(item):
104
+ message_id = item
105
+ logger.success(f"[{message_id}]")
106
+ else:
107
+ logger.warn(f"[{res.status_code}]")
108
+ raise ValueError("Failed to get message ID!")
109
+
110
+ return message_id
111
+
112
+ def log_request(self, url, method="GET"):
113
+ logger.note(f"> {method}:", end=" ")
114
+ logger.mesg(f"{url}", end=" ")
115
+
116
+ def log_response(
117
+ self, res: requests.Response, stream=False, iter_lines=False, verbose=False
118
+ ):
119
+ status_code = res.status_code
120
+ status_code_str = f"[{status_code}]"
121
+
122
+ if status_code == 200:
123
+ logger_func = logger.success
124
+ else:
125
+ logger_func = logger.warn
126
+
127
+ logger.enter_quiet(not verbose)
128
+ logger_func(status_code_str)
129
+
130
+ if status_code != 200:
131
+ logger_func(res.text)
132
+
133
+ if stream:
134
+ if not iter_lines:
135
+ return
136
+
137
+ for line in res.iter_lines():
138
+ line = line.decode("utf-8")
139
+ line = re.sub(r"^data:\s*", "", line)
140
+ line = line.strip()
141
+ if line:
142
+ try:
143
+ data = json.loads(line, strict=False)
144
+ msg_type = data.get("type")
145
+ if msg_type == "status":
146
+ msg_status = data.get("status")
147
+ elif msg_type == "stream":
148
+ content = data.get("token", "")
149
+ logger_func(content, end="")
150
+ elif msg_type == "finalAnswer":
151
+ full_content = data.get("text")
152
+ logger.success("\n[Finished]")
153
+ break
154
+ else:
155
+ pass
156
+ except Exception as e:
157
+ logger.warn(e)
158
+ else:
159
+ logger_func(res.json())
160
+
161
+ logger.exit_quiet(not verbose)
162
+
163
+ def chat_completions(self, messages: list[dict], iter_lines=False, verbose=False):
164
+ composer = MessageComposer(model=self.model)
165
+ system_prompt, input_prompt = composer.decompose_to_system_and_input_prompt(
166
+ messages
167
+ )
168
+
169
+ checker = TokenChecker(input_str=system_prompt + input_prompt, model=self.model)
170
+ checker.check_token_limit()
171
+
172
+ self.get_hf_chat_id()
173
+ self.get_conversation_id(system_prompt=system_prompt)
174
+ message_id = self.get_last_message_id()
175
+
176
+ request_url = f"https://huggingface.co/chat/conversation/{self.conversation_id}"
177
+ request_headers = copy.deepcopy(HUGGINGCHAT_POST_HEADERS)
178
+ extra_headers = {
179
+ "Content-Type": "text/event-stream",
180
+ "Referer": request_url,
181
+ "Cookie": f"hf-chat={self.hf_chat_id}",
182
+ }
183
+ request_headers.update(extra_headers)
184
+ request_body = {
185
+ "files": [],
186
+ "id": message_id,
187
+ "inputs": input_prompt,
188
+ "is_continue": False,
189
+ "is_retry": False,
190
+ "web_search": False,
191
+ }
192
+ self.log_request(request_url, method="POST")
193
+
194
+ res = requests.post(
195
+ request_url,
196
+ headers=request_headers,
197
+ json=request_body,
198
+ proxies=PROXIES,
199
+ stream=True,
200
+ )
201
+ self.log_response(res, stream=True, iter_lines=iter_lines, verbose=verbose)
202
+ return res
203
+
204
+
205
+ class HuggingchatStreamer:
206
+ def __init__(self, model: str):
207
+ if model in MODEL_MAP.keys():
208
+ self.model = model
209
+ else:
210
+ self.model = "nous-mixtral-8x7b"
211
+ self.model_fullname = MODEL_MAP[self.model]
212
+ self.message_outputer = OpenaiStreamOutputer(model=self.model)
213
+
214
+ def chat_response(self, messages: list[dict], verbose=False):
215
+ requester = HuggingchatRequester(model=self.model)
216
+ return requester.chat_completions(
217
+ messages=messages, iter_lines=False, verbose=verbose
218
+ )
219
+
220
+ def chat_return_generator(self, stream_response: requests.Response, verbose=False):
221
+ is_finished = False
222
+ for line in stream_response.iter_lines():
223
+ line = line.decode("utf-8")
224
+ line = re.sub(r"^data:\s*", "", line)
225
+ line = line.strip()
226
+ if not line:
227
+ continue
228
+
229
+ content = ""
230
+ content_type = "Completions"
231
+ try:
232
+ data = json.loads(line, strict=False)
233
+ msg_type = data.get("type")
234
+ if msg_type == "status":
235
+ msg_status = data.get("status")
236
+ continue
237
+ elif msg_type == "stream":
238
+ content_type = "Completions"
239
+ content = data.get("token", "")
240
+ if verbose:
241
+ logger.success(content, end="")
242
+ elif msg_type == "finalAnswer":
243
+ content_type = "Finished"
244
+ content = ""
245
+ full_content = data.get("text")
246
+ if verbose:
247
+ logger.success("\n[Finished]")
248
+ is_finished = True
249
+ break
250
+ else:
251
+ continue
252
+ except Exception as e:
253
+ logger.warn(e)
254
+
255
+ output = self.message_outputer.output(
256
+ content=content, content_type=content_type
257
+ )
258
+ yield output
259
+
260
+ if not is_finished:
261
+ yield self.message_outputer.output(content="", content_type="Finished")
262
+
263
+ def chat_return_dict(self, stream_response: requests.Response):
264
+ final_output = self.message_outputer.default_data.copy()
265
+ final_output["choices"] = [
266
+ {
267
+ "index": 0,
268
+ "finish_reason": "stop",
269
+ "message": {"role": "assistant", "content": ""},
270
+ }
271
+ ]
272
+ final_content = ""
273
+ for item in self.chat_return_generator(stream_response):
274
+ try:
275
+ data = json.loads(item)
276
+ delta = data["choices"][0]["delta"]
277
+ delta_content = delta.get("content", "")
278
+ if delta_content:
279
+ final_content += delta_content
280
+ except Exception as e:
281
+ logger.warn(e)
282
+ final_output["choices"][0]["message"]["content"] = final_content.strip()
283
+ return final_output
284
+
285
+
286
+ if __name__ == "__main__":
287
+ # model = "command-r-plus"
288
+ model = "llama3-70b"
289
+ # model = "zephyr-141b"
290
+
291
+ streamer = HuggingchatStreamer(model=model)
292
+ messages = [
293
+ {
294
+ "role": "system",
295
+ "content": "You are an LLM developed by CloseAI.\nYour name is Niansuh-Copilot.",
296
+ },
297
+ {"role": "user", "content": "Hello, what is your role?"},
298
+ {"role": "assistant", "content": "I am an LLM."},
299
+ {"role": "user", "content": "What is your name?"},
300
+ ]
301
+
302
+ streamer.chat_response(messages=messages)
303
+ # HF_ENDPOINT=https://hf-mirror.com python -m networks.huggingchat_streamer
networks/huggingface_streamer.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+ import requests
4
+
5
+ from tclogger import logger
6
+ from constants.models import MODEL_MAP, STOP_SEQUENCES_MAP
7
+ from constants.envs import PROXIES
8
+ from messagers.message_outputer import OpenaiStreamOutputer
9
+ from messagers.token_checker import TokenChecker
10
+
11
+
12
+ class HuggingfaceStreamer:
13
+ def __init__(self, model: str):
14
+ if model in MODEL_MAP.keys():
15
+ self.model = model
16
+ else:
17
+ self.model = "nous-mixtral-8x7b"
18
+ self.model_fullname = MODEL_MAP[self.model]
19
+ self.message_outputer = OpenaiStreamOutputer(model=self.model)
20
+
21
+ def parse_line(self, line):
22
+ line = line.decode("utf-8")
23
+ line = re.sub(r"data:\s*", "", line)
24
+ data = json.loads(line)
25
+ content = ""
26
+ try:
27
+ content = data["token"]["text"]
28
+ except:
29
+ logger.err(data)
30
+ return content
31
+
32
+ def chat_response(
33
+ self,
34
+ prompt: str = None,
35
+ temperature: float = 0.5,
36
+ top_p: float = 0.95,
37
+ max_new_tokens: int = None,
38
+ api_key: str = None,
39
+ use_cache: bool = False,
40
+ ):
41
+ # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
42
+ # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
43
+ self.request_url = (
44
+ f"https://api-inference.huggingface.co/models/{self.model_fullname}"
45
+ )
46
+ self.request_headers = {
47
+ "Content-Type": "application/json",
48
+ }
49
+
50
+ if api_key:
51
+ logger.note(
52
+ f"Using API Key: {api_key[:3]}{(len(api_key)-7)*'*'}{api_key[-4:]}"
53
+ )
54
+ self.request_headers["Authorization"] = f"Bearer {api_key}"
55
+
56
+ if temperature is None or temperature < 0:
57
+ temperature = 0.0
58
+ # temperature must 0 < and < 1 for HF LLM models
59
+ temperature = max(temperature, 0.01)
60
+ temperature = min(temperature, 0.99)
61
+ top_p = max(top_p, 0.01)
62
+ top_p = min(top_p, 0.99)
63
+
64
+ checker = TokenChecker(input_str=prompt, model=self.model)
65
+
66
+ if max_new_tokens is None or max_new_tokens <= 0:
67
+ max_new_tokens = checker.get_token_redundancy()
68
+ else:
69
+ max_new_tokens = min(max_new_tokens, checker.get_token_redundancy())
70
+
71
+ # References:
72
+ # huggingface_hub/inference/_client.py:
73
+ # class InferenceClient > def text_generation()
74
+ # huggingface_hub/inference/_text_generation.py:
75
+ # class TextGenerationRequest > param `stream`
76
+ # https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
77
+ # https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task
78
+ self.request_body = {
79
+ "inputs": prompt,
80
+ "parameters": {
81
+ "temperature": temperature,
82
+ "top_p": top_p,
83
+ "max_new_tokens": max_new_tokens,
84
+ "return_full_text": False,
85
+ },
86
+ "options": {
87
+ "use_cache": use_cache,
88
+ },
89
+ "stream": True,
90
+ }
91
+
92
+ if self.model in STOP_SEQUENCES_MAP.keys():
93
+ self.stop_sequences = STOP_SEQUENCES_MAP[self.model]
94
+ # self.request_body["parameters"]["stop_sequences"] = [
95
+ # self.STOP_SEQUENCES[self.model]
96
+ # ]
97
+
98
+ logger.back(self.request_url)
99
+ stream_response = requests.post(
100
+ self.request_url,
101
+ headers=self.request_headers,
102
+ json=self.request_body,
103
+ proxies=PROXIES,
104
+ stream=True,
105
+ )
106
+ status_code = stream_response.status_code
107
+ if status_code == 200:
108
+ logger.success(status_code)
109
+ else:
110
+ logger.err(status_code)
111
+
112
+ return stream_response
113
+
114
+ def chat_return_dict(self, stream_response):
115
+ # https://platform.openai.com/docs/guides/text-generation/chat-completions-response-format
116
+ final_output = self.message_outputer.default_data.copy()
117
+ final_output["choices"] = [
118
+ {
119
+ "index": 0,
120
+ "finish_reason": "stop",
121
+ "message": {
122
+ "role": "assistant",
123
+ "content": "",
124
+ },
125
+ }
126
+ ]
127
+ logger.back(final_output)
128
+
129
+ final_content = ""
130
+ for line in stream_response.iter_lines():
131
+ if not line:
132
+ continue
133
+ content = self.parse_line(line)
134
+
135
+ if content.strip() == self.stop_sequences:
136
+ logger.success("\n[Finished]")
137
+ break
138
+ else:
139
+ logger.back(content, end="")
140
+ final_content += content
141
+
142
+ if self.model in STOP_SEQUENCES_MAP.keys():
143
+ final_content = final_content.replace(self.stop_sequences, "")
144
+
145
+ final_content = final_content.strip()
146
+ final_output["choices"][0]["message"]["content"] = final_content
147
+ return final_output
148
+
149
+ def chat_return_generator(self, stream_response):
150
+ is_finished = False
151
+ line_count = 0
152
+ for line in stream_response.iter_lines():
153
+ if line:
154
+ line_count += 1
155
+ else:
156
+ continue
157
+
158
+ content = self.parse_line(line)
159
+
160
+ if content.strip() == self.stop_sequences:
161
+ content_type = "Finished"
162
+ logger.success("\n[Finished]")
163
+ is_finished = True
164
+ else:
165
+ content_type = "Completions"
166
+ if line_count == 1:
167
+ content = content.lstrip()
168
+ logger.back(content, end="")
169
+
170
+ output = self.message_outputer.output(
171
+ content=content, content_type=content_type
172
+ )
173
+ yield output
174
+
175
+ if not is_finished:
176
+ yield self.message_outputer.output(content="", content_type="Finished")
networks/openai_streamer.py ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import re
4
+ import tiktoken
5
+ import uuid
6
+
7
+ from curl_cffi import requests
8
+ from tclogger import logger
9
+
10
+ from constants.envs import PROXIES
11
+ from constants.headers import OPENAI_GET_HEADERS, OPENAI_POST_DATA
12
+ from constants.models import TOKEN_LIMIT_MAP, TOKEN_RESERVED
13
+
14
+ from messagers.message_outputer import OpenaiStreamOutputer
15
+ from networks.proof_worker import ProofWorker
16
+
17
+
18
+ class OpenaiRequester:
19
+ def __init__(self):
20
+ self.init_requests_params()
21
+
22
+ def init_requests_params(self):
23
+ self.api_base = "https://chat.openai.com/backend-anon"
24
+ self.api_me = f"{self.api_base}/me"
25
+ self.api_models = f"{self.api_base}/models"
26
+ self.api_chat_requirements = f"{self.api_base}/sentinel/chat-requirements"
27
+ self.api_conversation = f"{self.api_base}/conversation"
28
+ self.uuid = str(uuid.uuid4())
29
+ self.requests_headers = copy.deepcopy(OPENAI_GET_HEADERS)
30
+ extra_headers = {
31
+ "Oai-Device-Id": self.uuid,
32
+ }
33
+ self.requests_headers.update(extra_headers)
34
+
35
+ def log_request(self, url, method="GET"):
36
+ logger.note(f"> {method}:", end=" ")
37
+ logger.mesg(f"{url}", end=" ")
38
+
39
+ def log_response(
40
+ self, res: requests.Response, stream=False, iter_lines=False, verbose=False
41
+ ):
42
+ status_code = res.status_code
43
+ status_code_str = f"[{status_code}]"
44
+
45
+ if status_code == 200:
46
+ logger_func = logger.success
47
+ else:
48
+ logger_func = logger.warn
49
+
50
+ logger_func(status_code_str)
51
+
52
+ logger.enter_quiet(not verbose)
53
+
54
+ if stream:
55
+ if not iter_lines:
56
+ return
57
+
58
+ if not hasattr(self, "content_offset"):
59
+ self.content_offset = 0
60
+
61
+ for line in res.iter_lines():
62
+ line = line.decode("utf-8")
63
+ line = re.sub(r"^data:\s*", "", line)
64
+ if re.match(r"^\[DONE\]", line):
65
+ logger.success("\n[Finished]")
66
+ break
67
+ line = line.strip()
68
+ if line:
69
+ try:
70
+ data = json.loads(line, strict=False)
71
+ message_role = data["message"]["author"]["role"]
72
+ message_status = data["message"]["status"]
73
+ if (
74
+ message_role == "assistant"
75
+ and message_status == "in_progress"
76
+ ):
77
+ content = data["message"]["content"]["parts"][0]
78
+ delta_content = content[self.content_offset :]
79
+ self.content_offset = len(content)
80
+ logger_func(delta_content, end="")
81
+ except Exception as e:
82
+ logger.warn(e)
83
+ else:
84
+ logger_func(res.json())
85
+
86
+ logger.exit_quiet(not verbose)
87
+
88
+ def get_models(self):
89
+ self.log_request(self.api_models)
90
+ res = requests.get(
91
+ self.api_models,
92
+ headers=self.requests_headers,
93
+ proxies=PROXIES,
94
+ timeout=10,
95
+ impersonate="chrome120",
96
+ )
97
+ self.log_response(res)
98
+
99
+ def auth(self):
100
+ self.log_request(self.api_chat_requirements, method="POST")
101
+ res = requests.post(
102
+ self.api_chat_requirements,
103
+ headers=self.requests_headers,
104
+ proxies=PROXIES,
105
+ timeout=10,
106
+ impersonate="chrome120",
107
+ )
108
+ data = res.json()
109
+ self.chat_requirements_token = data["token"]
110
+ self.chat_requirements_seed = data["proofofwork"]["seed"]
111
+ self.chat_requirements_difficulty = data["proofofwork"]["difficulty"]
112
+ self.log_response(res)
113
+
114
+ def transform_messages(self, messages: list[dict]):
115
+ def get_role(role):
116
+ if role in ["system", "user", "assistant"]:
117
+ return role
118
+ else:
119
+ return "system"
120
+
121
+ new_messages = [
122
+ {
123
+ "author": {"role": get_role(message["role"])},
124
+ "content": {"content_type": "text", "parts": [message["content"]]},
125
+ "metadata": {},
126
+ }
127
+ for message in messages
128
+ ]
129
+ return new_messages
130
+
131
+ def chat_completions(self, messages: list[dict], iter_lines=False, verbose=False):
132
+ proof_token = ProofWorker().calc_proof_token(
133
+ self.chat_requirements_seed, self.chat_requirements_difficulty
134
+ )
135
+ extra_headers = {
136
+ "Accept": "text/event-stream",
137
+ "Openai-Sentinel-Chat-Requirements-Token": self.chat_requirements_token,
138
+ "Openai-Sentinel-Proof-Token": proof_token,
139
+ }
140
+ requests_headers = copy.deepcopy(self.requests_headers)
141
+ requests_headers.update(extra_headers)
142
+
143
+ post_data = copy.deepcopy(OPENAI_POST_DATA)
144
+ extra_data = {
145
+ "messages": self.transform_messages(messages),
146
+ "websocket_request_id": str(uuid.uuid4()),
147
+ }
148
+ post_data.update(extra_data)
149
+
150
+ self.log_request(self.api_conversation, method="POST")
151
+ s = requests.Session()
152
+ res = s.post(
153
+ self.api_conversation,
154
+ headers=requests_headers,
155
+ json=post_data,
156
+ proxies=PROXIES,
157
+ timeout=10,
158
+ impersonate="chrome120",
159
+ stream=True,
160
+ )
161
+ self.log_response(res, stream=True, iter_lines=iter_lines, verbose=verbose)
162
+ return res
163
+
164
+
165
+ class OpenaiStreamer:
166
+ def __init__(self):
167
+ self.model = "gpt-3.5-turbo"
168
+ self.message_outputer = OpenaiStreamOutputer(
169
+ owned_by="openai", model="gpt-3.5-turbo"
170
+ )
171
+ self.tokenizer = tiktoken.get_encoding("cl100k_base")
172
+
173
+ def count_tokens(self, messages: list[dict]):
174
+ token_count = sum(
175
+ len(self.tokenizer.encode(message["content"])) for message in messages
176
+ )
177
+ logger.note(f"Prompt Token Count: {token_count}")
178
+ return token_count
179
+
180
+ def check_token_limit(self, messages: list[dict]):
181
+ token_limit = TOKEN_LIMIT_MAP[self.model]
182
+ token_count = self.count_tokens(messages)
183
+ token_redundancy = int(token_limit - TOKEN_RESERVED - token_count)
184
+ if token_redundancy <= 0:
185
+ raise ValueError(
186
+ f"Prompt exceeded token limit: {token_count} > {token_limit}"
187
+ )
188
+ return True
189
+
190
+ def chat_response(self, messages: list[dict], iter_lines=False, verbose=False):
191
+ self.check_token_limit(messages)
192
+ logger.enter_quiet(not verbose)
193
+ requester = OpenaiRequester()
194
+ requester.auth()
195
+ logger.exit_quiet(not verbose)
196
+ return requester.chat_completions(
197
+ messages=messages, iter_lines=iter_lines, verbose=verbose
198
+ )
199
+
200
+ def chat_return_generator(self, stream_response: requests.Response, verbose=False):
201
+ content_offset = 0
202
+ is_finished = False
203
+
204
+ for line in stream_response.iter_lines():
205
+ line = line.decode("utf-8")
206
+ line = re.sub(r"^data:\s*", "", line)
207
+ line = line.strip()
208
+
209
+ if not line:
210
+ continue
211
+
212
+ if re.match(r"^\[DONE\]", line):
213
+ content_type = "Finished"
214
+ delta_content = ""
215
+ logger.success("\n[Finished]")
216
+ is_finished = True
217
+ else:
218
+ content_type = "Completions"
219
+ delta_content = ""
220
+ try:
221
+ data = json.loads(line, strict=False)
222
+ message_role = data["message"]["author"]["role"]
223
+ message_status = data["message"]["status"]
224
+ if message_role == "assistant" and message_status == "in_progress":
225
+ content = data["message"]["content"]["parts"][0]
226
+ if not len(content):
227
+ continue
228
+ delta_content = content[content_offset:]
229
+ content_offset = len(content)
230
+ if verbose:
231
+ logger.success(delta_content, end="")
232
+ else:
233
+ continue
234
+ except Exception as e:
235
+ logger.warn(e)
236
+
237
+ output = self.message_outputer.output(
238
+ content=delta_content, content_type=content_type
239
+ )
240
+ yield output
241
+
242
+ if not is_finished:
243
+ yield self.message_outputer.output(content="", content_type="Finished")
244
+
245
+ def chat_return_dict(self, stream_response: requests.Response):
246
+ final_output = self.message_outputer.default_data.copy()
247
+ final_output["choices"] = [
248
+ {
249
+ "index": 0,
250
+ "finish_reason": "stop",
251
+ "message": {"role": "assistant", "content": ""},
252
+ }
253
+ ]
254
+ final_content = ""
255
+ for item in self.chat_return_generator(stream_response):
256
+ try:
257
+ data = json.loads(item)
258
+ delta = data["choices"][0]["delta"]
259
+ delta_content = delta.get("content", "")
260
+ if delta_content:
261
+ final_content += delta_content
262
+ except Exception as e:
263
+ logger.warn(e)
264
+ final_output["choices"][0]["message"]["content"] = final_content.strip()
265
+ return final_output
266
+
267
+
268
+ if __name__ == "__main__":
269
+ streamer = OpenaiStreamer()
270
+ messages = [
271
+ {
272
+ "role": "system",
273
+ "content": "You are an LLM developed by NiansuhAI.\nYour name is Niansuh-Copilot.",
274
+ },
275
+ {"role": "user", "content": "Hello, what is your role?"},
276
+ {"role": "assistant", "content": "I am an LLM."},
277
+ {"role": "user", "content": "What is your name?"},
278
+ ]
279
+
280
+ streamer.chat_response(messages=messages, iter_lines=True, verbose=True)
281
+ # python -m networks.openai_streamer
networks/proof_worker.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ from hashlib import sha3_512
3
+ import json
4
+ import random
5
+
6
+ from datetime import datetime, timedelta, timezone
7
+
8
+ from constants.headers import OPENAI_GET_HEADERS
9
+
10
+
11
+ class ProofWorker:
12
+ def __init__(self, difficulty=None, required=False, seed=None):
13
+ self.difficulty = difficulty
14
+ self.required = required
15
+ self.seed = seed
16
+ self.proof_token_prefix = "gAAAAABwQ8Lk5FbGpA2NcR9dShT6gYjU7VxZ4D"
17
+
18
+ def get_parse_time(self):
19
+ now = datetime.now()
20
+ tz = timezone(timedelta(hours=8))
21
+ now = now.astimezone(tz)
22
+ time_format = "%a %b %d %Y %H:%M:%S"
23
+ return now.strftime(time_format) + " GMT+0800 (中国标准时间)"
24
+
25
+ def get_config(self):
26
+ cores = [8, 12, 16, 24]
27
+ core = random.choice(cores)
28
+ screens = [3000, 4000, 6000]
29
+ screen = random.choice(screens)
30
+ return [
31
+ str(core) + str(screen),
32
+ self.get_parse_time(),
33
+ 4294705152,
34
+ 0,
35
+ OPENAI_GET_HEADERS["User-Agent"],
36
+ ]
37
+
38
+ def calc_proof_token(self, seed: str, difficulty: str):
39
+ config = self.get_config()
40
+ diff_len = len(difficulty) // 2
41
+ for i in range(100000):
42
+ config[3] = i
43
+ json_str = json.dumps(config)
44
+ base = base64.b64encode(json_str.encode()).decode()
45
+ hasher = sha3_512()
46
+ hasher.update((seed + base).encode())
47
+ hash = hasher.digest().hex()
48
+ if hash[:diff_len] <= difficulty:
49
+ return "gAAAAAB" + base
50
+ self.proof_token = (
51
+ self.proof_token_prefix + base64.b64encode(seed.encode()).decode()
52
+ )
53
+ return self.proof_token
54
+
55
+
56
+ if __name__ == "__main__":
57
+ seed, difficulty = "0.42665582693491433", "05cdf2"
58
+ worker = ProofWorker()
59
+ proof_token = worker.calc_proof_token(seed, difficulty)
60
+ print(f"proof_token: {proof_token}")
61
+ # python -m networks.proof_worker
requirements.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp
2
+ curl_cffi
3
+ fastapi
4
+ httpx
5
+ jinja2
6
+ markdown2[all]
7
+ openai
8
+ protobuf
9
+ pydantic
10
+ requests
11
+ sentencepiece
12
+ sse_starlette
13
+ termcolor
14
+ tclogger
15
+ tiktoken
16
+ transformers
17
+ uvicorn
18
+ websockets
tests/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+
tests/openai.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import copy
2
+ import json
3
+ import re
4
+ import uuid
5
+
6
+ from pathlib import Path
7
+
8
+ from curl_cffi import requests
9
+ from tclogger import logger, OSEnver
10
+ from constants.envs import PROXIES
11
+
12
+
13
+ class OpenaiAPI:
14
+ def __init__(self):
15
+ self.init_requests_params()
16
+
17
+ def init_requests_params(self):
18
+ self.api_base = "https://chat.openai.com/backend-anon"
19
+ self.api_me = f"{self.api_base}/me"
20
+ self.api_models = f"{self.api_base}/models"
21
+ self.api_chat_requirements = f"{self.api_base}/sentinel/chat-requirements"
22
+ self.api_conversation = f"{self.api_base}/conversation"
23
+ self.uuid = str(uuid.uuid4())
24
+ self.requests_headers = {
25
+ # "Accept": "*/*",
26
+ "Accept-Encoding": "gzip, deflate, br, zstd",
27
+ "Accept-Language": "en-US,en;q=0.9",
28
+ "Cache-Control": "no-cache",
29
+ "Content-Type": "application/json",
30
+ "Oai-Device-Id": self.uuid,
31
+ "Oai-Language": "en-US",
32
+ "Pragma": "no-cache",
33
+ "Referer": "https://chat.openai.com/",
34
+ "Sec-Ch-Ua": 'Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
35
+ "Sec-Ch-Ua-Mobile": "?0",
36
+ "Sec-Ch-Ua-Platform": '"Windows"',
37
+ "Sec-Fetch-Dest": "empty",
38
+ "Sec-Fetch-Mode": "cors",
39
+ "Sec-Fetch-Site": "same-origin",
40
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
41
+ }
42
+
43
+ def log_request(self, url, method="GET"):
44
+ logger.note(f"> {method}:", end=" ")
45
+ logger.mesg(f"{url}", end=" ")
46
+
47
+ def log_response(self, res: requests.Response, stream=False, verbose=False):
48
+ status_code = res.status_code
49
+ status_code_str = f"[{status_code}]"
50
+
51
+ if status_code == 200:
52
+ logger_func = logger.success
53
+ else:
54
+ logger_func = logger.warn
55
+
56
+ logger_func(status_code_str)
57
+
58
+ if verbose:
59
+ if stream:
60
+ if not hasattr(self, "content_offset"):
61
+ self.content_offset = 0
62
+
63
+ for line in res.iter_lines():
64
+ line = line.decode("utf-8")
65
+ line = re.sub(r"^data:\s*", "", line)
66
+ if re.match(r"^\[DONE\]", line):
67
+ logger.success("\n[Finished]")
68
+ break
69
+ line = line.strip()
70
+ if line:
71
+ try:
72
+ data = json.loads(line, strict=False)
73
+ message_role = data["message"]["author"]["role"]
74
+ message_status = data["message"]["status"]
75
+ if (
76
+ message_role == "assistant"
77
+ and message_status == "in_progress"
78
+ ):
79
+ content = data["message"]["content"]["parts"][0]
80
+ delta_content = content[self.content_offset :]
81
+ self.content_offset = len(content)
82
+ logger_func(delta_content, end="")
83
+ except Exception as e:
84
+ logger.warn(e)
85
+ else:
86
+ logger_func(res.json())
87
+
88
+ def get_models(self):
89
+ self.log_request(self.api_models)
90
+ res = requests.get(
91
+ self.api_models,
92
+ headers=self.requests_headers,
93
+ proxies=PROXIES,
94
+ timeout=10,
95
+ impersonate="chrome120",
96
+ )
97
+ self.log_response(res)
98
+
99
+ def auth(self):
100
+ self.log_request(self.api_chat_requirements, method="POST")
101
+ res = requests.post(
102
+ self.api_chat_requirements,
103
+ headers=self.requests_headers,
104
+ proxies=PROXIES,
105
+ timeout=10,
106
+ impersonate="chrome120",
107
+ )
108
+ self.chat_requirements_token = res.json()["token"]
109
+ self.log_response(res)
110
+
111
+ def transform_messages(self, messages: list[dict]):
112
+ def get_role(role):
113
+ if role in ["system", "user", "assistant"]:
114
+ return role
115
+ else:
116
+ return "system"
117
+
118
+ new_messages = [
119
+ {
120
+ "author": {"role": get_role(message["role"])},
121
+ "content": {"content_type": "text", "parts": [message["content"]]},
122
+ "metadata": {},
123
+ }
124
+ for message in messages
125
+ ]
126
+ return new_messages
127
+
128
+ def chat_completions(self, messages: list[dict]):
129
+ new_headers = {
130
+ "Accept": "text/event-stream",
131
+ "Openai-Sentinel-Chat-Requirements-Token": self.chat_requirements_token,
132
+ }
133
+ requests_headers = copy.deepcopy(self.requests_headers)
134
+ requests_headers.update(new_headers)
135
+ post_data = {
136
+ "action": "next",
137
+ "messages": self.transform_messages(messages),
138
+ "parent_message_id": "",
139
+ "model": "text-davinci-002-render-sha",
140
+ "timezone_offset_min": -480,
141
+ "suggestions": [],
142
+ "history_and_training_disabled": False,
143
+ "conversation_mode": {"kind": "primary_assistant"},
144
+ "force_paragen": False,
145
+ "force_paragen_model_slug": "",
146
+ "force_nulligen": False,
147
+ "force_rate_limit": False,
148
+ "websocket_request_id": str(uuid.uuid4()),
149
+ }
150
+ self.log_request(self.api_conversation, method="POST")
151
+ s = requests.Session()
152
+ res = s.post(
153
+ self.api_conversation,
154
+ headers=requests_headers,
155
+ json=post_data,
156
+ proxies=PROXIES,
157
+ timeout=10,
158
+ impersonate="chrome120",
159
+ stream=True,
160
+ )
161
+ self.log_response(res, stream=True, verbose=True)
162
+
163
+
164
+ if __name__ == "__main__":
165
+ api = OpenaiAPI()
166
+ # api.get_models()
167
+ api.auth()
168
+ messages = [
169
+ {"role": "system", "content": "I am Niansuh"},
170
+ {"role": "system", "content": "I have a cat named Lucky"},
171
+ {"role": "user", "content": "Repeat my name and my cat's name"},
172
+ {
173
+ "role": "assistant",
174
+ "content": "Your name is Niansuh and your cat's name is Lucky.",
175
+ },
176
+ {"role": "user", "content": "summarize our conversation"},
177
+ ]
178
+ api.chat_completions(messages)
179
+
180
+ # python -m tests.openai
vercel.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "builds": [
3
+ {
4
+ "src": "apis/chat_api.py",
5
+ "use": "@vercel/python"
6
+ }
7
+ ],
8
+ "routes": [
9
+ {
10
+ "src": "/(.*)",
11
+ "dest": "/apis/chat_api.py"
12
+ }
13
+ ],
14
+ "env": {
15
+ "APP_MODULE": "apis.chat_api:app"
16
+ }
17
+ }