Spaces:

aithink
/

HFLLMAPI

Running

App Files Files Community

aithink commited on Nov 26, 2024

Commit

b6c2bbd

verified ·

1 Parent(s): 4d7d027

Upload 32 files

Browse files

Files changed (32) hide show

.gitignore +2 -0
Dockerfile +8 -0
README.md +167 -10
__init__.py +0 -0
apis/__init__.py +0 -0
apis/chat_api.py +214 -0
configs/__init__.py +1 -0
configs/config.json +6 -0
configs/secrets_template.json +4 -0
constants/__init__.py +1 -0
constants/envs.py +21 -0
constants/headers.py +67 -0
constants/models.py +143 -0
examples/__init__.py +0 -0
examples/chat_with_openai.py +25 -0
examples/chat_with_post.py +55 -0
messagers/__init__.py +0 -0
messagers/message_composer.py +237 -0
messagers/message_outputer.py +65 -0
messagers/token_checker.py +46 -0
mocks/__init__.py +0 -0
mocks/stream_chat_mocker.py +13 -0
networks/__init__.py +0 -0
networks/exceptions.py +31 -0
networks/huggingchat_streamer.py +303 -0
networks/huggingface_streamer.py +176 -0
networks/openai_streamer.py +281 -0
networks/proof_worker.py +61 -0
requirements.txt +18 -0
tests/__init__.py +1 -0
tests/openai.py +180 -0
vercel.json +17 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ secrets.json
2	+ __pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,8 @@

+FROM python:3.11-slim
+WORKDIR $HOME/app
+COPY requirements.txt $HOME/app
+RUN mkdir /.cache && chmod 777 /.cache
+RUN pip install -r requirements.txt
+COPY . $HOME/app
+EXPOSE 23333
+CMD ["python", "-m", "apis.chat_api"]

README.md CHANGED Viewed

@@ -1,10 +1,167 @@
----
-title: HFLLMAPI
-emoji: 🐨
-colorFrom: gray
-colorTo: green
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: HF LLM API
+emoji: ☪️
+colorFrom: gray
+colorTo: gray
+sdk: docker
+app_port: 23333
+---
+## HF-LLM-API
+![](https://img.shields.io/github/v/release/Niansuh/HF-LLM-API?label=HF-LLM-API&color=blue&cacheSeconds=60)
+Huggingface LLM Inference API in OpenAI message format.
+# Original Project link: https://github.com/Hansimov/HF-LLM-API
+## Features
+- Available Models (2024/04/20):
+  - `mistral-7b`, `mixtral-8x7b`, `nous-mixtral-8x7b`, `gemma-7b`, `command-r-plus`, `llama3-70b`, `zephyr-141b`, `gpt-3.5-turbo`
+  - Adaptive prompt templates for different models
+- Support OpenAI API format
+  - Enable api endpoint via official `openai-python` package
+- Support both stream and no-stream response
+- Support API Key via both HTTP auth header and env variable
+- Docker deployment
+## Run API service
+### Run in Command Line
+**Install dependencies:**
+```bash
+# pipreqs . --force --mode no-pin
+pip install -r requirements.txt
+```
+**Run API:**
+```bash
+python -m apis.chat_api
+```
+## Run via Docker
+**Docker build:**
+```bash
+sudo docker build -t hf-llm-api:1.1.3 . --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
+```
+**Docker run:**
+```bash
+# no proxy
+sudo docker run -p 23333:23333 hf-llm-api:1.1.3
+# with proxy
+sudo docker run -p 23333:23333 --env http_proxy="http://<server>:<port>" hf-llm-api:1.1.3
+```
+## API Usage
+### Using `openai-python`
+See: [`examples/chat_with_openai.py`](https://github.com/Niansuh/HF-LLM-API/blob/main/examples/chat_with_openai.py)
+```py
+from openai import OpenAI
+# If runnning this service with proxy, you might need to unset `http(s)_proxy`.
+base_url = "http://127.0.0.1:23333"
+# Your own HF_TOKEN
+api_key = "hf_xxxxxxxxxxxxxxxx"
+# use below as non-auth user
+# api_key = "sk-xxx"
+client = OpenAI(base_url=base_url, api_key=api_key)
+response = client.chat.completions.create(
+    model="nous-mixtral-8x7b",
+    messages=[
+        {
+            "role": "user",
+            "content": "what is your model",
+        }
+    ],
+    stream=True,
+)
+for chunk in response:
+    if chunk.choices[0].delta.content is not None:
+        print(chunk.choices[0].delta.content, end="", flush=True)
+    elif chunk.choices[0].finish_reason == "stop":
+        print()
+    else:
+        pass
+```
+### Using post requests
+See: [`examples/chat_with_post.py`](https://github.com/Niansuh/HF-LLM-API/blob/main/examples/chat_with_post.py)
+```py
+import ast
+import httpx
+import json
+import re
+# If runnning this service with proxy, you might need to unset `http(s)_proxy`.
+chat_api = "http://127.0.0.1:23333"
+# Your own HF_TOKEN
+api_key = "hf_xxxxxxxxxxxxxxxx"
+# use below as non-auth user
+# api_key = "sk-xxx"
+requests_headers = {}
+requests_payload = {
+    "model": "nous-mixtral-8x7b",
+    "messages": [
+        {
+            "role": "user",
+            "content": "what is your model",
+        }
+    ],
+    "stream": True,
+}
+with httpx.stream(
+    "POST",
+    chat_api + "/chat/completions",
+    headers=requests_headers,
+    json=requests_payload,
+    timeout=httpx.Timeout(connect=20, read=60, write=20, pool=None),
+) as response:
+    # https://docs.aiohttp.org/en/stable/streams.html
+    # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb
+    response_content = ""
+    for line in response.iter_lines():
+        remove_patterns = [r"^\s*data:\s*", r"^\s*\[DONE\]\s*"]
+        for pattern in remove_patterns:
+            line = re.sub(pattern, "", line).strip()
+        if line:
+            try:
+                line_data = json.loads(line)
+            except Exception as e:
+                try:
+                    line_data = ast.literal_eval(line)
+                except:
+                    print(f"Error: {line}")
+                    raise e
+            # print(f"line: {line_data}")
+            delta_data = line_data["choices"][0]["delta"]
+            finish_reason = line_data["choices"][0]["finish_reason"]
+            if "role" in delta_data:
+                role = delta_data["role"]
+            if "content" in delta_data:
+                delta_content = delta_data["content"]
+                response_content += delta_content
+                print(delta_content, end="", flush=True)
+            if finish_reason == "stop":
+                print()
+```

__init__.py ADDED Viewed

File without changes

apis/__init__.py ADDED Viewed

File without changes

apis/chat_api.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import argparse
+import markdown2
+import os
+import sys
+import uvicorn
+from pathlib import Path
+from typing import Union
+from fastapi import FastAPI, Depends, HTTPException
+from fastapi.responses import HTMLResponse
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from pydantic import BaseModel, Field
+from sse_starlette.sse import EventSourceResponse, ServerSentEvent
+from tclogger import logger
+from constants.models import AVAILABLE_MODELS_DICTS, PRO_MODELS
+from constants.envs import CONFIG, SECRETS
+from networks.exceptions import HfApiException, INVALID_API_KEY_ERROR
+from messagers.message_composer import MessageComposer
+from mocks.stream_chat_mocker import stream_chat_mock
+from networks.huggingface_streamer import HuggingfaceStreamer
+from networks.huggingchat_streamer import HuggingchatStreamer
+from networks.openai_streamer import OpenaiStreamer
+class ChatAPIApp:
+    def __init__(self):
+        self.app = FastAPI(
+            docs_url="/",
+            title=CONFIG["app_name"],
+            swagger_ui_parameters={"defaultModelsExpandDepth": -1},
+            version=CONFIG["version"],
+        )
+        self.setup_routes()
+    def get_available_models(self):
+        return {"object": "list", "data": AVAILABLE_MODELS_DICTS}
+    def extract_api_key(
+        credentials: HTTPAuthorizationCredentials = Depends(HTTPBearer()),
+    ):
+        api_key = None
+        if credentials:
+            api_key = credentials.credentials
+        env_api_key = SECRETS["HF_LLM_API_KEY"]
+        return api_key
+    def auth_api_key(self, api_key: str):
+        env_api_key = SECRETS["HF_LLM_API_KEY"]
+        # require no api_key
+        if not env_api_key:
+            return None
+        # user provides HF_TOKEN
+        if api_key and api_key.startswith("hf_"):
+            return api_key
+        # user provides correct API_KEY
+        if str(api_key) == str(env_api_key):
+            return None
+        raise INVALID_API_KEY_ERROR
+    class ChatCompletionsPostItem(BaseModel):
+        model: str = Field(
+            default="nous-mixtral-8x7b",
+            description="(str) `nous-mixtral-8x7b`",
+        )
+        messages: list = Field(
+            default=[{"role": "user", "content": "Hello, who are you?"}],
+            description="(list) Messages",
+        )
+        temperature: Union[float, None] = Field(
+            default=0.5,
+            description="(float) Temperature",
+        )
+        top_p: Union[float, None] = Field(
+            default=0.95,
+            description="(float) top p",
+        )
+        max_tokens: Union[int, None] = Field(
+            default=-1,
+            description="(int) Max tokens",
+        )
+        use_cache: bool = Field(
+            default=False,
+            description="(bool) Use cache",
+        )
+        stream: bool = Field(
+            default=True,
+            description="(bool) Stream",
+        )
+    def chat_completions(
+        self, item: ChatCompletionsPostItem, api_key: str = Depends(extract_api_key)
+    ):
+        try:
+            api_key = self.auth_api_key(api_key)
+            if item.model == "gpt-3.5-turbo":
+                streamer = OpenaiStreamer()
+                stream_response = streamer.chat_response(messages=item.messages)
+            elif item.model in PRO_MODELS:
+                streamer = HuggingchatStreamer(model=item.model)
+                stream_response = streamer.chat_response(
+                    messages=item.messages,
+                )
+            else:
+                streamer = HuggingfaceStreamer(model=item.model)
+                composer = MessageComposer(model=item.model)
+                composer.merge(messages=item.messages)
+                stream_response = streamer.chat_response(
+                    prompt=composer.merged_str,
+                    temperature=item.temperature,
+                    top_p=item.top_p,
+                    max_new_tokens=item.max_tokens,
+                    api_key=api_key,
+                    use_cache=item.use_cache,
+                )
+            if item.stream:
+                event_source_response = EventSourceResponse(
+                    streamer.chat_return_generator(stream_response),
+                    media_type="text/event-stream",
+                    ping=2000,
+                    ping_message_factory=lambda: ServerSentEvent(**{"comment": ""}),
+                )
+                return event_source_response
+            else:
+                data_response = streamer.chat_return_dict(stream_response)
+                return data_response
+        except HfApiException as e:
+            raise HTTPException(status_code=e.status_code, detail=e.detail)
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+    def get_readme(self):
+        readme_path = Path(__file__).parents[1] / "README.md"
+        with open(readme_path, "r", encoding="utf-8") as rf:
+            readme_str = rf.read()
+        readme_html = markdown2.markdown(
+            readme_str, extras=["table", "fenced-code-blocks", "highlightjs-lang"]
+        )
+        return readme_html
+    def setup_routes(self):
+        for prefix in ["", "/v1", "/api", "/api/v1"]:
+            if prefix in ["/api/v1"]:
+                include_in_schema = True
+            else:
+                include_in_schema = False
+            self.app.get(
+                prefix + "/models",
+                summary="Get available models",
+                include_in_schema=include_in_schema,
+            )(self.get_available_models)
+            self.app.post(
+                prefix + "/chat/completions",
+                summary="Chat completions in conversation session",
+                include_in_schema=include_in_schema,
+            )(self.chat_completions)
+        self.app.get(
+            "/readme",
+            summary="README of HF LLM API",
+            response_class=HTMLResponse,
+            include_in_schema=False,
+        )(self.get_readme)
+class ArgParser(argparse.ArgumentParser):
+    def __init__(self, *args, **kwargs):
+        super(ArgParser, self).__init__(*args, **kwargs)
+        self.add_argument(
+            "-s",
+            "--host",
+            type=str,
+            default=CONFIG["host"],
+            help=f"Host for {CONFIG['app_name']}",
+        )
+        self.add_argument(
+            "-p",
+            "--port",
+            type=int,
+            default=CONFIG["port"],
+            help=f"Port for {CONFIG['app_name']}",
+        )
+        self.add_argument(
+            "-d",
+            "--dev",
+            default=False,
+            action="store_true",
+            help="Run in dev mode",
+        )
+        self.args = self.parse_args(sys.argv[1:])
+app = ChatAPIApp().app
+if __name__ == "__main__":
+    args = ArgParser().args
+    if args.dev:
+        uvicorn.run("__main__:app", host=args.host, port=args.port, reload=True)
+    else:
+        uvicorn.run("__main__:app", host=args.host, port=args.port, reload=False)
+    # python -m apis.chat_api      # [Docker] on product mode
+    # python -m apis.chat_api -d   # [Dev]    on develop mode

configs/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

configs/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "app_name": "HuggingFace LLM API",
+    "version": "1.4.1a",
+    "host": "0.0.0.0",
+    "port": 23333
+}

configs/secrets_template.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+    "http_proxy": "http://127.0.0.1:11111",
+    "HF_LLM_API_KEY": "********"
+}

constants/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

constants/envs.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from pathlib import Path
+from tclogger import logger, OSEnver
+config_root = Path(__file__).parents[1] / "configs"
+secrets_path = config_root / "secrets.json"
+SECRETS = OSEnver(secrets_path)
+http_proxy = SECRETS["http_proxy"]
+if http_proxy:
+    logger.note(f"> Using proxy: {http_proxy}")
+    PROXIES = {
+        "http": http_proxy,
+        "https": http_proxy,
+    }
+else:
+    PROXIES = None
+config_path = config_root / "config.json"
+CONFIG = OSEnver(config_path)

constants/headers.py ADDED Viewed

	@@ -0,0 +1,67 @@

+REQUESTS_HEADERS = {
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
+}
+HUGGINGCHAT_POST_HEADERS = {
+    "Accept-Encoding": "gzip, deflate, br, zstd",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Cache-Control": "no-cache",
+    "Content-Type": "application/json",
+    "Origin": "https://huggingface.co",
+    "Pragma": "no-cache",
+    "Referer": "https://huggingface.co/chat/",
+    "Sec-Ch-Ua": 'Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
+    "Sec-Ch-Ua-Mobile": "?0",
+    "Sec-Ch-Ua-Platform": '"Windows"',
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "cors",
+    "Sec-Fetch-Site": "same-origin",
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
+}
+HUGGINGCHAT_SETTINGS_POST_DATA = {
+    "assistants": [],
+    "customPrompts": {},
+    "ethicsModalAccepted": True,
+    "ethicsModalAcceptedAt": None,
+    "hideEmojiOnSidebar": False,
+    "recentlySaved": False,
+    "searchEnabled": True,
+    "shareConversationsWithModelAuthors": True,
+}
+OPENAI_GET_HEADERS = {
+    # "Accept": "*/*",
+    "Accept-Encoding": "gzip, deflate, br, zstd",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Cache-Control": "no-cache",
+    "Content-Type": "application/json",
+    # "Oai-Device-Id": self.uuid,
+    "Oai-Language": "en-US",
+    "Pragma": "no-cache",
+    "Referer": "https://chat.openai.com/",
+    "Sec-Ch-Ua": 'Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
+    "Sec-Ch-Ua-Mobile": "?0",
+    "Sec-Ch-Ua-Platform": '"Windows"',
+    "Sec-Fetch-Dest": "empty",
+    "Sec-Fetch-Mode": "cors",
+    "Sec-Fetch-Site": "same-origin",
+    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
+}
+OPENAI_POST_DATA = {
+    "action": "next",
+    # "conversation_id": "...",
+    "conversation_mode": {"kind": "primary_assistant"},
+    "force_nulligen": False,
+    "force_paragen": False,
+    "force_paragen_model_slug": "",
+    "force_rate_limit": False,
+    "history_and_training_disabled": False,
+    # "messages": [...],
+    "model": "text-davinci-002-render-sha",
+    "parent_message_id": "",
+    "suggestions": [],
+    "timezone_offset_min": -480,
+}

constants/models.py ADDED Viewed

	@@ -0,0 +1,143 @@

+MODEL_MAP = {
+    "mixtral-8x7b": "mistralai/Mixtral-8x7B-Instruct-v0.1",  # [Recommended]
+    "nous-mixtral-8x7b": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+    "Llama-3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
+    "Mistral-Nemo-Instruct-2407": "mistralai/Mistral-Nemo-Instruct-2407",
+    "mistral-7b": "mistralai/Mistral-7B-Instruct-v0.2",
+    "yi-1.5-34b": "01-ai/Yi-1.5-34B-Chat",
+    "gemma-7b": "google/gemma-1.1-7b-it",
+    # "openchat-3.5": "openchat/openchat-3.5-0106",
+    # "command-r-plus": "CohereForAI/c4ai-command-r-plus",
+    # "llama3-70b": "meta-llama/Meta-Llama-3-70B-Instruct",
+    # "zephyr-141b": "HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
+    "default": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+}
+AVAILABLE_MODELS = list(MODEL_MAP.keys())
+PRO_MODELS = ["command-r-plus", "llama3-70b", "zephyr-141b"]
+STOP_SEQUENCES_MAP = {
+    # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1/blob/main/tokenizer_config.json#L33
+    "mixtral-8x7b": "</s>",
+    # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO/blob/main/tokenizer_config.json#L50
+    "nous-mixtral-8x7b": "<|im_end|>",
+    # https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct/blob/1d54af340dc8906a2d21146191a9c184c35e47bd/tokenizer_config.json#L2055
+    "Llama-3.1-70B-Instruct": "<|eot_id|>",
+    # https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct/blob/1d54af340dc8906a2d21146191a9c184c35e47bd/tokenizer_config.json#L2055
+    "Mistral-Nemo-Instruct-2407": "</s>",
+    # https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/blob/main/tokenizer_config.json#L33
+    "mistral-7b": "</s>",
+    # https://huggingface.co/01-ai/Yi-1.5-34B-Chat/blob/main/tokenizer_config.json#L42
+    "yi-1.5-34b": "<|im_end|>",
+    # https://huggingface.co/google/gemma-1.1-7b-it/blob/main/tokenizer_config.json#L1509
+    "gemma-7b": "<eos>",
+    # "openchat-3.5": "<|end_of_turn|>",
+    # "command-r-plus": "<|END_OF_TURN_TOKEN|>",
+}
+TOKEN_LIMIT_MAP = {
+    "mixtral-8x7b": 32768,
+    "nous-mixtral-8x7b": 32768,
+    "Llama-3.1-70B-Instruct": 32768,
+    "Mistral-Nemo-Instruct-2407": 1024000,
+    "mistral-7b": 32768,
+    "yi-1.5-34b": 4096,
+    "gemma-7b": 8192,
+    # "openchat-3.5": 8192,
+    # "command-r-plus": 32768,
+    # "llama3-70b": 8192,
+    # "zephyr-141b": 2048,
+    # "gpt-3.5-turbo": 8192,
+}
+TOKEN_RESERVED = 20
+# https://platform.openai.com/docs/api-reference/models/list
+AVAILABLE_MODELS_DICTS = [
+    {
+        "id": "mixtral-8x7b",
+        "description": "[mistralai/Mixtral-8x7B-Instruct-v0.1]: https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "mistralai",
+    },
+    {
+        "id": "nous-mixtral-8x7b",
+        "description": "[NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO]: https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "NousResearch",
+    },
+    {
+        "id": "Mistral-Nemo-Instruct-2407",
+        "description": "[mistralai/Mistral-Nemo-Instruct-2407]: https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "mistralai",
+    },
+    {
+        "id": "Llama-3.1-70B-Instruct",
+        "description": "[meta-llama/Llama-3.1-70B-Instruct]: https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "mistralai",
+    },
+    {
+        "id": "mistral-7b",
+        "description": "[mistralai/Mistral-7B-Instruct-v0.2]: https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "mistralai",
+    },
+    {
+        "id": "yi-1.5-34b",
+        "description": "[01-ai/Yi-1.5-34B-Chat]: https://huggingface.co/01-ai/Yi-1.5-34B-Chat",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "01-ai",
+    },
+    {
+        "id": "gemma-7b",
+        "description": "[google/gemma-1.1-7b-it]: https://huggingface.co/google/gemma-1.1-7b-it",
+        "object": "model",
+        "created": 1700000000,
+        "owned_by": "Google",
+    },
+    # {
+    #     "id": "openchat-3.5",
+    #     "description": "[openchat/openchat-3.5-0106]: https://huggingface.co/openchat/openchat-3.5-0106",
+    #     "object": "model",
+    #     "created": 1700000000,
+    #     "owned_by": "openchat",
+    # },
+    # {
+    #     "id": "command-r-plus",
+    #     "description": "[CohereForAI/c4ai-command-r-plus]: https://huggingface.co/CohereForAI/c4ai-command-r-plus",
+    #     "object": "model",
+    #     "created": 1700000000,
+    #     "owned_by": "CohereForAI",
+    # },
+    # {
+    #     "id": "llama3-70b",
+    #     "description": "[meta-llama/Meta-Llama-3-70B]: https://huggingface.co/meta-llama/Meta-Llama-3-70B",
+    #     "object": "model",
+    #     "created": 1700000000,
+    #     "owned_by": "Meta",
+    # },
+    # {
+    #     "id": "zephyr-141b",
+    #     "description": "[HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1]: https://huggingface.co/HuggingFaceH4/zephyr-orpo-141b-A35b-v0.1",
+    #     "object": "model",
+    #     "created": 1700000000,
+    #     "owned_by": "Huggingface",
+    # },
+    # {
+    #     "id": "gpt-3.5-turbo",
+    #     "description": "[openai/gpt-3.5-turbo]: https://platform.openai.com/docs/models/gpt-3-5-turbo",
+    #     "object": "model",
+    #     "created": 1700000000,
+    #     "owned_by": "OpenAI",
+    # },
+]

examples/__init__.py ADDED Viewed

File without changes

examples/chat_with_openai.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from openai import OpenAI
+# If runnning this service with proxy, you might need to unset `http(s)_proxy`.
+base_url = "http://127.0.0.1:23333"
+api_key = "sk-xxxxx"
+client = OpenAI(base_url=base_url, api_key=api_key)
+response = client.chat.completions.create(
+    model="nous-mixtral-8x7b",
+    messages=[
+        {
+            "role": "user",
+            "content": "what is your model",
+        }
+    ],
+    stream=True,
+)
+for chunk in response:
+    if chunk.choices[0].delta.content is not None:
+        print(chunk.choices[0].delta.content, end="", flush=True)
+    elif chunk.choices[0].finish_reason == "stop":
+        print()
+    else:
+        pass

examples/chat_with_post.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import ast
+import httpx
+import json
+import re
+# If runnning this service with proxy, you might need to unset `http(s)_proxy`.
+chat_api = "http://127.0.0.1:23333"
+api_key = "sk-xxxxx"
+requests_headers = {}
+requests_payload = {
+    "model": "nous-mixtral-8x7b",
+    "messages": [
+        {
+            "role": "user",
+            "content": "what is your model",
+        }
+    ],
+    "stream": True,
+}
+with httpx.stream(
+    "POST",
+    chat_api + "/chat/completions",
+    headers=requests_headers,
+    json=requests_payload,
+    timeout=httpx.Timeout(connect=20, read=60, write=20, pool=None),
+) as response:
+    # https://docs.aiohttp.org/en/stable/streams.html
+    # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_stream_completions.ipynb
+    response_content = ""
+    for line in response.iter_lines():
+        remove_patterns = [r"^\s*data:\s*", r"^\s*\[DONE\]\s*"]
+        for pattern in remove_patterns:
+            line = re.sub(pattern, "", line).strip()
+        if line:
+            try:
+                line_data = json.loads(line)
+            except Exception as e:
+                try:
+                    line_data = ast.literal_eval(line)
+                except:
+                    print(f"Error: {line}")
+                    raise e
+            # print(f"line: {line_data}")
+            delta_data = line_data["choices"][0]["delta"]
+            finish_reason = line_data["choices"][0]["finish_reason"]
+            if "role" in delta_data:
+                role = delta_data["role"]
+            if "content" in delta_data:
+                delta_content = delta_data["content"]
+                response_content += delta_content
+                print(delta_content, end="", flush=True)
+            if finish_reason == "stop":
+                print()

messagers/__init__.py ADDED Viewed

File without changes

messagers/message_composer.py ADDED Viewed

	@@ -0,0 +1,237 @@

+import re
+from pprint import pprint
+from transformers import AutoTokenizer
+from constants.models import AVAILABLE_MODELS, MODEL_MAP
+from tclogger import logger
+class MessageComposer:
+    def __init__(self, model: str = None):
+        if model in AVAILABLE_MODELS:
+            self.model = model
+        else:
+            self.model = "nous-mixtral-8x7b"
+        self.model_fullname = MODEL_MAP[self.model]
+        self.system_roles = ["system"]
+        self.inst_roles = ["user", "system", "inst"]
+        self.answer_roles = ["assistant", "bot", "answer", "model"]
+        self.default_role = "user"
+    def concat_messages_by_role(self, messages):
+        def is_same_role(role1, role2):
+            if (
+                (role1 == role2)
+                or (role1 in self.inst_roles and role2 in self.inst_roles)
+                or (role1 in self.answer_roles and role2 in self.answer_roles)
+            ):
+                return True
+            else:
+                return False
+        concat_messages = []
+        for message in messages:
+            role = message["role"]
+            content = message["content"]
+            if concat_messages and is_same_role(role, concat_messages[-1]["role"]):
+                concat_messages[-1]["content"] += "\n" + content
+            else:
+                if role in self.inst_roles:
+                    message["role"] = "inst"
+                elif role in self.answer_roles:
+                    message["role"] = "answer"
+                else:
+                    message["role"] = "inst"
+                concat_messages.append(message)
+        return concat_messages
+    def merge(self, messages) -> str:
+        # Templates for Chat Models
+        # - https://huggingface.co/docs/transformers/main/en/chat_templating
+        #   - https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1#instruction-format
+        #   - https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
+        #   - https://huggingface.co/openchat/openchat-3.5-0106
+        #   - https://huggingface.co/google/gemma-7b-it#chat-template
+        # Mistral and Mixtral:
+        #   <s> [INST] Instruction [/INST] Model answer </s> [INST] Follow-up instruction [/INST]
+        # Nous Mixtral:
+        #   <|im_start|>system
+        #   You are "Hermes 2".<|im_end|>
+        #   <|im_start|>user
+        #   Hello, who are you?<|im_end|>
+        #   <|im_start|>assistant
+        # OpenChat:
+        #   GPT4 Correct User: Hello<|end_of_turn|>GPT4 Correct Assistant: Hi<|end_of_turn|>GPT4 Correct User: How are you today?<|end_of_turn|>GPT4 Correct Assistant:
+        # Google Gemma-it
+        # <start_of_turn>user
+        # How does the brain work?<end_of_turn>
+        # <start_of_turn>model
+        self.messages = messages
+        self.merged_str = ""
+        # https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1#instruction-format
+        if self.model in ["mixtral-8x7b", "mistral-7b"]:
+            self.messages = self.concat_messages_by_role(messages)
+            self.cached_str = ""
+            for message in self.messages:
+                role = message["role"]
+                content = message["content"]
+                if role in self.inst_roles:
+                    self.cached_str = f"[INST] {content} [/INST]"
+                elif role in self.answer_roles:
+                    self.merged_str += f"<s> {self.cached_str} {content} </s>\n"
+                    self.cached_str = ""
+                else:
+                    self.cached_str = f"[INST] {content} [/INST]"
+            if self.cached_str:
+                self.merged_str += f"{self.cached_str}"
+        # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
+        elif self.model in ["nous-mixtral-8x7b"]:
+            self.merged_str_list = []
+            for message in self.messages:
+                role = message["role"]
+                content = message["content"]
+                if role not in ["system", "user", "assistant"]:
+                    role = self.default_role
+                message_line = f"<|im_start|>{role}\n{content}<|im_end|>"
+                self.merged_str_list.append(message_line)
+            self.merged_str_list.append("<|im_start|>assistant")
+            self.merged_str = "\n".join(self.merged_str_list)
+        # https://huggingface.co/openchat/openchat-3.5-0106
+        elif self.model in ["openchat-3.5"]:
+            self.messages = self.concat_messages_by_role(messages)
+            self.merged_str_list = []
+            self.end_of_turn = "<|end_of_turn|>"
+            for message in self.messages:
+                role = message["role"]
+                content = message["content"]
+                if role in self.inst_roles:
+                    self.merged_str_list.append(
+                        f"GPT4 Correct User:\n{content}{self.end_of_turn}"
+                    )
+                elif role in self.answer_roles:
+                    self.merged_str_list.append(
+                        f"GPT4 Correct Assistant:\n{content}{self.end_of_turn}"
+                    )
+                else:
+                    self.merged_str_list.append(
+                        f"GPT4 Correct User: {content}{self.end_of_turn}"
+                    )
+            self.merged_str_list.append(f"GPT4 Correct Assistant:\n")
+            self.merged_str = "\n".join(self.merged_str_list)
+        # https://huggingface.co/google/gemma-1.1-7b-it#chat-template
+        elif self.model in ["gemma-7b"]:
+            self.messages = self.concat_messages_by_role(messages)
+            self.merged_str_list = []
+            self.end_of_turn = "<end_of_turn>"
+            self.start_of_turn = "<start_of_turn>"
+            for message in self.messages:
+                role = message["role"]
+                content = message["content"]
+                if role in self.inst_roles:
+                    self.merged_str_list.append(
+                        f"{self.start_of_turn}user\n{content}{self.end_of_turn}"
+                    )
+                elif role in self.answer_roles:
+                    self.merged_str_list.append(
+                        f"{self.start_of_turn}model\n{content}{self.end_of_turn}"
+                    )
+                else:
+                    self.merged_str_list.append(
+                        f"{self.start_of_turn}user\n{content}{self.end_of_turn}"
+                    )
+            self.merged_str_list.append(f"{self.start_of_turn}model\n")
+            self.merged_str = "<bos>" + "\n".join(self.merged_str_list)
+        # https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO#prompt-format
+        # https://huggingface.co/openchat/openchat-3.5-0106
+        # https://huggingface.co/01-ai/Yi-1.5-34B-Chat
+        elif self.model in ["openchat-3.5", "command-r-plus", "gemma-7b", "yi-1.5-34b"]:
+            # https://discuss.huggingface.co/t/error-with-new-tokenizers-urgent/2847/5
+            tokenizer = AutoTokenizer.from_pretrained(
+                self.model_fullname, use_fast=False
+            )
+            self.merged_str = tokenizer.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+        else:
+            self.merged_str = "\n\n".join(
+                [f"{message['role']}: {message['content']}" for message in messages]
+            )
+        return self.merged_str
+    def decompose_to_system_and_input_prompt(
+        self, messages: list[dict], append_assistant=True
+    ):
+        system_prompt_list = []
+        user_and_assistant_messages = []
+        for message in messages:
+            role = message["role"]
+            content = message["content"]
+            if role in self.system_roles:
+                system_prompt_list.append(content)
+            else:
+                user_and_assistant_messages.append(message)
+        system_prompt = "\n".join(system_prompt_list)
+        input_prompt_list = []
+        input_messages = self.concat_messages_by_role(user_and_assistant_messages)
+        for message in input_messages:
+            role = message["role"]
+            content = message["content"]
+            if role in self.answer_roles:
+                role_content_str = f"`assistant`:\n{content}"
+            else:
+                role_content_str = f"`user`:\n{content}"
+            input_prompt_list.append(role_content_str)
+        input_prompt = "\n\n".join(input_prompt_list)
+        if append_assistant:
+            input_prompt += "\n\n`assistant`:"
+        return system_prompt, input_prompt
+if __name__ == "__main__":
+    # model = "mixtral-8x7b"
+    # model = "nous-mixtral-8x7b"
+    model = "gemma-7b"
+    # model = "openchat-3.5"
+    # model = "command-r-plus"
+    composer = MessageComposer(model)
+    messages = [
+        {
+            "role": "system",
+            "content": "You are a LLM developed by OpenAI.\nYour name is GPT-4.",
+        },
+        {"role": "user", "content": "Hello, who are you?"},
+        {"role": "assistant", "content": "I am a bot."},
+        {"role": "user", "content": "What is your name?"},
+        # {"role": "assistant", "content": "My name is Bing."},
+        # {"role": "user", "content": "Tell me a joke."},
+        # {"role": "assistant", "content": "What is a robot's favorite type of music?"},
+        # {
+        #     "role": "user",
+        #     "content": "How many questions have I asked? Please list them.",
+        # },
+    ]
+    # logger.note(f"model: {composer.model}")
+    # merged_str = composer.merge(messages)
+    # logger.note("merged_str:")
+    # logger.mesg(merged_str)
+    system_prompt, input_prompt = composer.decompose_to_system_and_input_prompt(
+        messages
+    )
+    logger.note("system_prompt:")
+    logger.mesg(system_prompt)
+    logger.note("input_prompt:")
+    logger.mesg(input_prompt)
+    # python -m messagers.message_composer

messagers/message_outputer.py ADDED Viewed

	@@ -0,0 +1,65 @@

+import json
+class OpenaiStreamOutputer:
+    """
+    Create chat completion - OpenAI API Documentation
+    * https://platform.openai.com/docs/api-reference/chat/create
+    """
+    def __init__(self, owned_by="huggingface", model="nous-mixtral-8x7b"):
+        self.default_data = {
+            "created": 1700000000,
+            "id": f"chatcmpl-{owned_by}",
+            "object": "chat.completion.chunk",
+            # "content_type": "Completions",
+            "model": model,
+            "choices": [],
+        }
+    def data_to_string(self, data={}, content_type=""):
+        data_str = f"{json.dumps(data)}"
+        return data_str
+    def output(self, content=None, content_type="Completions") -> str:
+        data = self.default_data.copy()
+        if content_type == "Role":
+            data["choices"] = [
+                {
+                    "index": 0,
+                    "delta": {"role": "assistant"},
+                    "finish_reason": None,
+                }
+            ]
+        elif content_type in [
+            "Completions",
+            "InternalSearchQuery",
+            "InternalSearchResult",
+            "SuggestedResponses",
+        ]:
+            if content_type in ["InternalSearchQuery", "InternalSearchResult"]:
+                content += "\n"
+            data["choices"] = [
+                {
+                    "index": 0,
+                    "delta": {"content": content},
+                    "finish_reason": None,
+                }
+            ]
+        elif content_type == "Finished":
+            data["choices"] = [
+                {
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": "stop",
+                }
+            ]
+        else:
+            data["choices"] = [
+                {
+                    "index": 0,
+                    "delta": {},
+                    "finish_reason": None,
+                }
+            ]
+        return self.data_to_string(data, content_type)

messagers/token_checker.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from tclogger import logger
+from transformers import AutoTokenizer
+from constants.models import MODEL_MAP, TOKEN_LIMIT_MAP, TOKEN_RESERVED
+class TokenChecker:
+    def __init__(self, input_str: str, model: str):
+        self.input_str = input_str
+        if model in MODEL_MAP.keys():
+            self.model = model
+        else:
+            self.model = "nous-mixtral-8x7b"
+        self.model_fullname = MODEL_MAP[self.model]
+        # As some models are gated, we need to fetch tokenizers from alternatives
+        GATED_MODEL_MAP = {
+            "llama3-70b": "NousResearch/Meta-Llama-3-70B",
+            "gemma-7b": "unsloth/gemma-7b",
+            "mistral-7b": "dfurman/Mistral-7B-Instruct-v0.2",
+            "mixtral-8x7b": "dfurman/Mixtral-8x7B-Instruct-v0.1",
+        }
+        if self.model in GATED_MODEL_MAP.keys():
+            self.tokenizer = AutoTokenizer.from_pretrained(GATED_MODEL_MAP[self.model])
+        else:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.model_fullname)
+    def count_tokens(self):
+        token_count = len(self.tokenizer.encode(self.input_str))
+        logger.note(f"Prompt Token Count: {token_count}")
+        return token_count
+    def get_token_limit(self):
+        return TOKEN_LIMIT_MAP[self.model]
+    def get_token_redundancy(self):
+        return int(self.get_token_limit() - TOKEN_RESERVED - self.count_tokens())
+    def check_token_limit(self):
+        if self.get_token_redundancy() <= 0:
+            raise ValueError(
+                f"Prompt exceeded token limit: {self.count_tokens()} > {self.get_token_limit()}"
+            )
+        return True

mocks/__init__.py ADDED Viewed

File without changes

mocks/stream_chat_mocker.py ADDED Viewed

	@@ -0,0 +1,13 @@

+import time
+from tclogger import logger
+def stream_chat_mock(*args, **kwargs):
+    logger.note(msg=str(args) + str(kwargs))
+    for i in range(10):
+        content = f"W{i+1} "
+        time.sleep(0.1)
+        logger.mesg(content, end="")
+        yield content
+    logger.mesg("")
+    yield ""

networks/__init__.py ADDED Viewed

File without changes

networks/exceptions.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import http
+from typing import Optional
+from fastapi import HTTPException, status
+class HfApiException(Exception):
+    def __init__(
+        self,
+        status_code: int,
+        detail: Optional[str] = None,
+    ) -> None:
+        if detail is None:
+            self.detail = http.HTTPStatus(status_code).phrase
+        else:
+            self.detail = detail
+        self.status_code = status_code
+    def __repr__(self) -> str:
+        class_name = self.__class__.__name__
+        return f"{class_name}(status_code={self.status_code!r}, detail={self.detail!r})"
+    def __str__(self) -> str:
+        return self.__repr__()
+INVALID_API_KEY_ERROR = HfApiException(
+    status_code=status.HTTP_403_FORBIDDEN,
+    detail="Invalid API Key",
+)

networks/huggingchat_streamer.py ADDED Viewed

	@@ -0,0 +1,303 @@

+import copy
+import json
+import re
+import requests
+from curl_cffi import requests as cffi_requests
+from tclogger import logger
+from constants.models import MODEL_MAP
+from constants.envs import PROXIES
+from constants.headers import HUGGINGCHAT_POST_HEADERS, HUGGINGCHAT_SETTINGS_POST_DATA
+from messagers.message_outputer import OpenaiStreamOutputer
+from messagers.message_composer import MessageComposer
+from messagers.token_checker import TokenChecker
+class HuggingchatRequester:
+    def __init__(self, model: str):
+        if model in MODEL_MAP.keys():
+            self.model = model
+        else:
+            self.model = "nous-mixtral-8x7b"
+        self.model_fullname = MODEL_MAP[self.model]
+    def get_hf_chat_id(self):
+        request_url = "https://huggingface.co/chat/settings"
+        request_body = copy.deepcopy(HUGGINGCHAT_SETTINGS_POST_DATA)
+        extra_body = {
+            "activeModel": self.model_fullname,
+        }
+        request_body.update(extra_body)
+        logger.note(f"> hf-chat ID:", end=" ")
+        res = cffi_requests.post(
+            request_url,
+            headers=HUGGINGCHAT_POST_HEADERS,
+            json=request_body,
+            proxies=PROXIES,
+            timeout=10,
+            impersonate="chrome",
+        )
+        self.hf_chat_id = res.cookies.get("hf-chat")
+        if self.hf_chat_id:
+            logger.success(f"[{self.hf_chat_id}]")
+        else:
+            logger.warn(f"[{res.status_code}]")
+            logger.warn(res.text)
+            raise ValueError(f"Failed to get hf-chat ID: {res.text}")
+    def get_conversation_id(self, system_prompt: str = ""):
+        request_url = "https://huggingface.co/chat/conversation"
+        request_headers = HUGGINGCHAT_POST_HEADERS
+        extra_headers = {
+            "Cookie": f"hf-chat={self.hf_chat_id}",
+        }
+        request_headers.update(extra_headers)
+        request_body = {
+            "model": self.model_fullname,
+            "preprompt": system_prompt,
+        }
+        logger.note(f"> Conversation ID:", end=" ")
+        res = requests.post(
+            request_url,
+            headers=request_headers,
+            json=request_body,
+            proxies=PROXIES,
+            timeout=10,
+        )
+        if res.status_code == 200:
+            conversation_id = res.json()["conversationId"]
+            logger.success(f"[{conversation_id}]")
+        else:
+            logger.warn(f"[{res.status_code}]")
+            raise ValueError("Failed to get conversation ID!")
+        self.conversation_id = conversation_id
+        return conversation_id
+    def get_last_message_id(self):
+        request_url = f"https://huggingface.co/chat/conversation/{self.conversation_id}/__data.json?x-sveltekit-invalidated=11"
+        request_headers = HUGGINGCHAT_POST_HEADERS
+        extra_headers = {
+            "Cookie": f"hf-chat={self.hf_chat_id}",
+        }
+        request_headers.update(extra_headers)
+        logger.note(f"> Message ID:", end=" ")
+        message_id = None
+        res = requests.post(
+            request_url,
+            headers=request_headers,
+            proxies=PROXIES,
+            timeout=10,
+        )
+        if res.status_code == 200:
+            data = res.json()["nodes"][1]["data"]
+            # find the last element which matches the format of uuid4
+            uuid_pattern = re.compile(
+                r"^[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}$"
+            )
+            for item in data:
+                if type(item) == str and uuid_pattern.match(item):
+                    message_id = item
+            logger.success(f"[{message_id}]")
+        else:
+            logger.warn(f"[{res.status_code}]")
+            raise ValueError("Failed to get message ID!")
+        return message_id
+    def log_request(self, url, method="GET"):
+        logger.note(f"> {method}:", end=" ")
+        logger.mesg(f"{url}", end=" ")
+    def log_response(
+        self, res: requests.Response, stream=False, iter_lines=False, verbose=False
+    ):
+        status_code = res.status_code
+        status_code_str = f"[{status_code}]"
+        if status_code == 200:
+            logger_func = logger.success
+        else:
+            logger_func = logger.warn
+        logger.enter_quiet(not verbose)
+        logger_func(status_code_str)
+        if status_code != 200:
+            logger_func(res.text)
+        if stream:
+            if not iter_lines:
+                return
+            for line in res.iter_lines():
+                line = line.decode("utf-8")
+                line = re.sub(r"^data:\s*", "", line)
+                line = line.strip()
+                if line:
+                    try:
+                        data = json.loads(line, strict=False)
+                        msg_type = data.get("type")
+                        if msg_type == "status":
+                            msg_status = data.get("status")
+                        elif msg_type == "stream":
+                            content = data.get("token", "")
+                            logger_func(content, end="")
+                        elif msg_type == "finalAnswer":
+                            full_content = data.get("text")
+                            logger.success("\n[Finished]")
+                            break
+                        else:
+                            pass
+                    except Exception as e:
+                        logger.warn(e)
+        else:
+            logger_func(res.json())
+        logger.exit_quiet(not verbose)
+    def chat_completions(self, messages: list[dict], iter_lines=False, verbose=False):
+        composer = MessageComposer(model=self.model)
+        system_prompt, input_prompt = composer.decompose_to_system_and_input_prompt(
+            messages
+        )
+        checker = TokenChecker(input_str=system_prompt + input_prompt, model=self.model)
+        checker.check_token_limit()
+        self.get_hf_chat_id()
+        self.get_conversation_id(system_prompt=system_prompt)
+        message_id = self.get_last_message_id()
+        request_url = f"https://huggingface.co/chat/conversation/{self.conversation_id}"
+        request_headers = copy.deepcopy(HUGGINGCHAT_POST_HEADERS)
+        extra_headers = {
+            "Content-Type": "text/event-stream",
+            "Referer": request_url,
+            "Cookie": f"hf-chat={self.hf_chat_id}",
+        }
+        request_headers.update(extra_headers)
+        request_body = {
+            "files": [],
+            "id": message_id,
+            "inputs": input_prompt,
+            "is_continue": False,
+            "is_retry": False,
+            "web_search": False,
+        }
+        self.log_request(request_url, method="POST")
+        res = requests.post(
+            request_url,
+            headers=request_headers,
+            json=request_body,
+            proxies=PROXIES,
+            stream=True,
+        )
+        self.log_response(res, stream=True, iter_lines=iter_lines, verbose=verbose)
+        return res
+class HuggingchatStreamer:
+    def __init__(self, model: str):
+        if model in MODEL_MAP.keys():
+            self.model = model
+        else:
+            self.model = "nous-mixtral-8x7b"
+        self.model_fullname = MODEL_MAP[self.model]
+        self.message_outputer = OpenaiStreamOutputer(model=self.model)
+    def chat_response(self, messages: list[dict], verbose=False):
+        requester = HuggingchatRequester(model=self.model)
+        return requester.chat_completions(
+            messages=messages, iter_lines=False, verbose=verbose
+        )
+    def chat_return_generator(self, stream_response: requests.Response, verbose=False):
+        is_finished = False
+        for line in stream_response.iter_lines():
+            line = line.decode("utf-8")
+            line = re.sub(r"^data:\s*", "", line)
+            line = line.strip()
+            if not line:
+                continue
+            content = ""
+            content_type = "Completions"
+            try:
+                data = json.loads(line, strict=False)
+                msg_type = data.get("type")
+                if msg_type == "status":
+                    msg_status = data.get("status")
+                    continue
+                elif msg_type == "stream":
+                    content_type = "Completions"
+                    content = data.get("token", "")
+                    if verbose:
+                        logger.success(content, end="")
+                elif msg_type == "finalAnswer":
+                    content_type = "Finished"
+                    content = ""
+                    full_content = data.get("text")
+                    if verbose:
+                        logger.success("\n[Finished]")
+                    is_finished = True
+                    break
+                else:
+                    continue
+            except Exception as e:
+                logger.warn(e)
+            output = self.message_outputer.output(
+                content=content, content_type=content_type
+            )
+            yield output
+        if not is_finished:
+            yield self.message_outputer.output(content="", content_type="Finished")
+    def chat_return_dict(self, stream_response: requests.Response):
+        final_output = self.message_outputer.default_data.copy()
+        final_output["choices"] = [
+            {
+                "index": 0,
+                "finish_reason": "stop",
+                "message": {"role": "assistant", "content": ""},
+            }
+        ]
+        final_content = ""
+        for item in self.chat_return_generator(stream_response):
+            try:
+                data = json.loads(item)
+                delta = data["choices"][0]["delta"]
+                delta_content = delta.get("content", "")
+                if delta_content:
+                    final_content += delta_content
+            except Exception as e:
+                logger.warn(e)
+        final_output["choices"][0]["message"]["content"] = final_content.strip()
+        return final_output
+if __name__ == "__main__":
+    # model = "command-r-plus"
+    model = "llama3-70b"
+    # model = "zephyr-141b"
+    streamer = HuggingchatStreamer(model=model)
+    messages = [
+        {
+            "role": "system",
+            "content": "You are an LLM developed by CloseAI.\nYour name is Niansuh-Copilot.",
+        },
+        {"role": "user", "content": "Hello, what is your role?"},
+        {"role": "assistant", "content": "I am an LLM."},
+        {"role": "user", "content": "What is your name?"},
+    ]
+    streamer.chat_response(messages=messages)
+    # HF_ENDPOINT=https://hf-mirror.com python -m networks.huggingchat_streamer

networks/huggingface_streamer.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import json
+import re
+import requests
+from tclogger import logger
+from constants.models import MODEL_MAP, STOP_SEQUENCES_MAP
+from constants.envs import PROXIES
+from messagers.message_outputer import OpenaiStreamOutputer
+from messagers.token_checker import TokenChecker
+class HuggingfaceStreamer:
+    def __init__(self, model: str):
+        if model in MODEL_MAP.keys():
+            self.model = model
+        else:
+            self.model = "nous-mixtral-8x7b"
+        self.model_fullname = MODEL_MAP[self.model]
+        self.message_outputer = OpenaiStreamOutputer(model=self.model)
+    def parse_line(self, line):
+        line = line.decode("utf-8")
+        line = re.sub(r"data:\s*", "", line)
+        data = json.loads(line)
+        content = ""
+        try:
+            content = data["token"]["text"]
+        except:
+            logger.err(data)
+        return content
+    def chat_response(
+        self,
+        prompt: str = None,
+        temperature: float = 0.5,
+        top_p: float = 0.95,
+        max_new_tokens: int = None,
+        api_key: str = None,
+        use_cache: bool = False,
+    ):
+        # https://huggingface.co/docs/api-inference/detailed_parameters?code=curl
+        # curl --proxy http://<server>:<port> https://api-inference.huggingface.co/models/<org>/<model_name> -X POST -d '{"inputs":"who are you?","parameters":{"max_new_token":64}}' -H 'Content-Type: application/json' -H 'Authorization: Bearer <HF_TOKEN>'
+        self.request_url = (
+            f"https://api-inference.huggingface.co/models/{self.model_fullname}"
+        )
+        self.request_headers = {
+            "Content-Type": "application/json",
+        }
+        if api_key:
+            logger.note(
+                f"Using API Key: {api_key[:3]}{(len(api_key)-7)*'*'}{api_key[-4:]}"
+            )
+            self.request_headers["Authorization"] = f"Bearer {api_key}"
+        if temperature is None or temperature < 0:
+            temperature = 0.0
+        # temperature must  0 < and < 1 for HF LLM models
+        temperature = max(temperature, 0.01)
+        temperature = min(temperature, 0.99)
+        top_p = max(top_p, 0.01)
+        top_p = min(top_p, 0.99)
+        checker = TokenChecker(input_str=prompt, model=self.model)
+        if max_new_tokens is None or max_new_tokens <= 0:
+            max_new_tokens = checker.get_token_redundancy()
+        else:
+            max_new_tokens = min(max_new_tokens, checker.get_token_redundancy())
+        # References:
+        #   huggingface_hub/inference/_client.py:
+        #     class InferenceClient > def text_generation()
+        #   huggingface_hub/inference/_text_generation.py:
+        #     class TextGenerationRequest > param `stream`
+        # https://huggingface.co/docs/text-generation-inference/conceptual/streaming#streaming-with-curl
+        # https://huggingface.co/docs/api-inference/detailed_parameters#text-generation-task
+        self.request_body = {
+            "inputs": prompt,
+            "parameters": {
+                "temperature": temperature,
+                "top_p": top_p,
+                "max_new_tokens": max_new_tokens,
+                "return_full_text": False,
+            },
+            "options": {
+                "use_cache": use_cache,
+            },
+            "stream": True,
+        }
+        if self.model in STOP_SEQUENCES_MAP.keys():
+            self.stop_sequences = STOP_SEQUENCES_MAP[self.model]
+        #     self.request_body["parameters"]["stop_sequences"] = [
+        #         self.STOP_SEQUENCES[self.model]
+        #     ]
+        logger.back(self.request_url)
+        stream_response = requests.post(
+            self.request_url,
+            headers=self.request_headers,
+            json=self.request_body,
+            proxies=PROXIES,
+            stream=True,
+        )
+        status_code = stream_response.status_code
+        if status_code == 200:
+            logger.success(status_code)
+        else:
+            logger.err(status_code)
+        return stream_response
+    def chat_return_dict(self, stream_response):
+        # https://platform.openai.com/docs/guides/text-generation/chat-completions-response-format
+        final_output = self.message_outputer.default_data.copy()
+        final_output["choices"] = [
+            {
+                "index": 0,
+                "finish_reason": "stop",
+                "message": {
+                    "role": "assistant",
+                    "content": "",
+                },
+            }
+        ]
+        logger.back(final_output)
+        final_content = ""
+        for line in stream_response.iter_lines():
+            if not line:
+                continue
+            content = self.parse_line(line)
+            if content.strip() == self.stop_sequences:
+                logger.success("\n[Finished]")
+                break
+            else:
+                logger.back(content, end="")
+                final_content += content
+        if self.model in STOP_SEQUENCES_MAP.keys():
+            final_content = final_content.replace(self.stop_sequences, "")
+        final_content = final_content.strip()
+        final_output["choices"][0]["message"]["content"] = final_content
+        return final_output
+    def chat_return_generator(self, stream_response):
+        is_finished = False
+        line_count = 0
+        for line in stream_response.iter_lines():
+            if line:
+                line_count += 1
+            else:
+                continue
+            content = self.parse_line(line)
+            if content.strip() == self.stop_sequences:
+                content_type = "Finished"
+                logger.success("\n[Finished]")
+                is_finished = True
+            else:
+                content_type = "Completions"
+                if line_count == 1:
+                    content = content.lstrip()
+                logger.back(content, end="")
+            output = self.message_outputer.output(
+                content=content, content_type=content_type
+            )
+            yield output
+        if not is_finished:
+            yield self.message_outputer.output(content="", content_type="Finished")

networks/openai_streamer.py ADDED Viewed

	@@ -0,0 +1,281 @@

+import copy
+import json
+import re
+import tiktoken
+import uuid
+from curl_cffi import requests
+from tclogger import logger
+from constants.envs import PROXIES
+from constants.headers import OPENAI_GET_HEADERS, OPENAI_POST_DATA
+from constants.models import TOKEN_LIMIT_MAP, TOKEN_RESERVED
+from messagers.message_outputer import OpenaiStreamOutputer
+from networks.proof_worker import ProofWorker
+class OpenaiRequester:
+    def __init__(self):
+        self.init_requests_params()
+    def init_requests_params(self):
+        self.api_base = "https://chat.openai.com/backend-anon"
+        self.api_me = f"{self.api_base}/me"
+        self.api_models = f"{self.api_base}/models"
+        self.api_chat_requirements = f"{self.api_base}/sentinel/chat-requirements"
+        self.api_conversation = f"{self.api_base}/conversation"
+        self.uuid = str(uuid.uuid4())
+        self.requests_headers = copy.deepcopy(OPENAI_GET_HEADERS)
+        extra_headers = {
+            "Oai-Device-Id": self.uuid,
+        }
+        self.requests_headers.update(extra_headers)
+    def log_request(self, url, method="GET"):
+        logger.note(f"> {method}:", end=" ")
+        logger.mesg(f"{url}", end=" ")
+    def log_response(
+        self, res: requests.Response, stream=False, iter_lines=False, verbose=False
+    ):
+        status_code = res.status_code
+        status_code_str = f"[{status_code}]"
+        if status_code == 200:
+            logger_func = logger.success
+        else:
+            logger_func = logger.warn
+        logger_func(status_code_str)
+        logger.enter_quiet(not verbose)
+        if stream:
+            if not iter_lines:
+                return
+            if not hasattr(self, "content_offset"):
+                self.content_offset = 0
+            for line in res.iter_lines():
+                line = line.decode("utf-8")
+                line = re.sub(r"^data:\s*", "", line)
+                if re.match(r"^\[DONE\]", line):
+                    logger.success("\n[Finished]")
+                    break
+                line = line.strip()
+                if line:
+                    try:
+                        data = json.loads(line, strict=False)
+                        message_role = data["message"]["author"]["role"]
+                        message_status = data["message"]["status"]
+                        if (
+                            message_role == "assistant"
+                            and message_status == "in_progress"
+                        ):
+                            content = data["message"]["content"]["parts"][0]
+                            delta_content = content[self.content_offset :]
+                            self.content_offset = len(content)
+                            logger_func(delta_content, end="")
+                    except Exception as e:
+                        logger.warn(e)
+        else:
+            logger_func(res.json())
+        logger.exit_quiet(not verbose)
+    def get_models(self):
+        self.log_request(self.api_models)
+        res = requests.get(
+            self.api_models,
+            headers=self.requests_headers,
+            proxies=PROXIES,
+            timeout=10,
+            impersonate="chrome120",
+        )
+        self.log_response(res)
+    def auth(self):
+        self.log_request(self.api_chat_requirements, method="POST")
+        res = requests.post(
+            self.api_chat_requirements,
+            headers=self.requests_headers,
+            proxies=PROXIES,
+            timeout=10,
+            impersonate="chrome120",
+        )
+        data = res.json()
+        self.chat_requirements_token = data["token"]
+        self.chat_requirements_seed = data["proofofwork"]["seed"]
+        self.chat_requirements_difficulty = data["proofofwork"]["difficulty"]
+        self.log_response(res)
+    def transform_messages(self, messages: list[dict]):
+        def get_role(role):
+            if role in ["system", "user", "assistant"]:
+                return role
+            else:
+                return "system"
+        new_messages = [
+            {
+                "author": {"role": get_role(message["role"])},
+                "content": {"content_type": "text", "parts": [message["content"]]},
+                "metadata": {},
+            }
+            for message in messages
+        ]
+        return new_messages
+    def chat_completions(self, messages: list[dict], iter_lines=False, verbose=False):
+        proof_token = ProofWorker().calc_proof_token(
+            self.chat_requirements_seed, self.chat_requirements_difficulty
+        )
+        extra_headers = {
+            "Accept": "text/event-stream",
+            "Openai-Sentinel-Chat-Requirements-Token": self.chat_requirements_token,
+            "Openai-Sentinel-Proof-Token": proof_token,
+        }
+        requests_headers = copy.deepcopy(self.requests_headers)
+        requests_headers.update(extra_headers)
+        post_data = copy.deepcopy(OPENAI_POST_DATA)
+        extra_data = {
+            "messages": self.transform_messages(messages),
+            "websocket_request_id": str(uuid.uuid4()),
+        }
+        post_data.update(extra_data)
+        self.log_request(self.api_conversation, method="POST")
+        s = requests.Session()
+        res = s.post(
+            self.api_conversation,
+            headers=requests_headers,
+            json=post_data,
+            proxies=PROXIES,
+            timeout=10,
+            impersonate="chrome120",
+            stream=True,
+        )
+        self.log_response(res, stream=True, iter_lines=iter_lines, verbose=verbose)
+        return res
+class OpenaiStreamer:
+    def __init__(self):
+        self.model = "gpt-3.5-turbo"
+        self.message_outputer = OpenaiStreamOutputer(
+            owned_by="openai", model="gpt-3.5-turbo"
+        )
+        self.tokenizer = tiktoken.get_encoding("cl100k_base")
+    def count_tokens(self, messages: list[dict]):
+        token_count = sum(
+            len(self.tokenizer.encode(message["content"])) for message in messages
+        )
+        logger.note(f"Prompt Token Count: {token_count}")
+        return token_count
+    def check_token_limit(self, messages: list[dict]):
+        token_limit = TOKEN_LIMIT_MAP[self.model]
+        token_count = self.count_tokens(messages)
+        token_redundancy = int(token_limit - TOKEN_RESERVED - token_count)
+        if token_redundancy <= 0:
+            raise ValueError(
+                f"Prompt exceeded token limit: {token_count} > {token_limit}"
+            )
+        return True
+    def chat_response(self, messages: list[dict], iter_lines=False, verbose=False):
+        self.check_token_limit(messages)
+        logger.enter_quiet(not verbose)
+        requester = OpenaiRequester()
+        requester.auth()
+        logger.exit_quiet(not verbose)
+        return requester.chat_completions(
+            messages=messages, iter_lines=iter_lines, verbose=verbose
+        )
+    def chat_return_generator(self, stream_response: requests.Response, verbose=False):
+        content_offset = 0
+        is_finished = False
+        for line in stream_response.iter_lines():
+            line = line.decode("utf-8")
+            line = re.sub(r"^data:\s*", "", line)
+            line = line.strip()
+            if not line:
+                continue
+            if re.match(r"^\[DONE\]", line):
+                content_type = "Finished"
+                delta_content = ""
+                logger.success("\n[Finished]")
+                is_finished = True
+            else:
+                content_type = "Completions"
+                delta_content = ""
+                try:
+                    data = json.loads(line, strict=False)
+                    message_role = data["message"]["author"]["role"]
+                    message_status = data["message"]["status"]
+                    if message_role == "assistant" and message_status == "in_progress":
+                        content = data["message"]["content"]["parts"][0]
+                        if not len(content):
+                            continue
+                        delta_content = content[content_offset:]
+                        content_offset = len(content)
+                        if verbose:
+                            logger.success(delta_content, end="")
+                    else:
+                        continue
+                except Exception as e:
+                    logger.warn(e)
+            output = self.message_outputer.output(
+                content=delta_content, content_type=content_type
+            )
+            yield output
+        if not is_finished:
+            yield self.message_outputer.output(content="", content_type="Finished")
+    def chat_return_dict(self, stream_response: requests.Response):
+        final_output = self.message_outputer.default_data.copy()
+        final_output["choices"] = [
+            {
+                "index": 0,
+                "finish_reason": "stop",
+                "message": {"role": "assistant", "content": ""},
+            }
+        ]
+        final_content = ""
+        for item in self.chat_return_generator(stream_response):
+            try:
+                data = json.loads(item)
+                delta = data["choices"][0]["delta"]
+                delta_content = delta.get("content", "")
+                if delta_content:
+                    final_content += delta_content
+            except Exception as e:
+                logger.warn(e)
+        final_output["choices"][0]["message"]["content"] = final_content.strip()
+        return final_output
+if __name__ == "__main__":
+    streamer = OpenaiStreamer()
+    messages = [
+        {
+            "role": "system",
+            "content": "You are an LLM developed by NiansuhAI.\nYour name is Niansuh-Copilot.",
+        },
+        {"role": "user", "content": "Hello, what is your role?"},
+        {"role": "assistant", "content": "I am an LLM."},
+        {"role": "user", "content": "What is your name?"},
+    ]
+    streamer.chat_response(messages=messages, iter_lines=True, verbose=True)
+    # python -m networks.openai_streamer

networks/proof_worker.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import base64
+from hashlib import sha3_512
+import json
+import random
+from datetime import datetime, timedelta, timezone
+from constants.headers import OPENAI_GET_HEADERS
+class ProofWorker:
+    def __init__(self, difficulty=None, required=False, seed=None):
+        self.difficulty = difficulty
+        self.required = required
+        self.seed = seed
+        self.proof_token_prefix = "gAAAAABwQ8Lk5FbGpA2NcR9dShT6gYjU7VxZ4D"
+    def get_parse_time(self):
+        now = datetime.now()
+        tz = timezone(timedelta(hours=8))
+        now = now.astimezone(tz)
+        time_format = "%a %b %d %Y %H:%M:%S"
+        return now.strftime(time_format) + " GMT+0800 (中国标准时间)"
+    def get_config(self):
+        cores = [8, 12, 16, 24]
+        core = random.choice(cores)
+        screens = [3000, 4000, 6000]
+        screen = random.choice(screens)
+        return [
+            str(core) + str(screen),
+            self.get_parse_time(),
+            4294705152,
+            0,
+            OPENAI_GET_HEADERS["User-Agent"],
+        ]
+    def calc_proof_token(self, seed: str, difficulty: str):
+        config = self.get_config()
+        diff_len = len(difficulty) // 2
+        for i in range(100000):
+            config[3] = i
+            json_str = json.dumps(config)
+            base = base64.b64encode(json_str.encode()).decode()
+            hasher = sha3_512()
+            hasher.update((seed + base).encode())
+            hash = hasher.digest().hex()
+            if hash[:diff_len] <= difficulty:
+                return "gAAAAAB" + base
+        self.proof_token = (
+            self.proof_token_prefix + base64.b64encode(seed.encode()).decode()
+        )
+        return self.proof_token
+if __name__ == "__main__":
+    seed, difficulty = "0.42665582693491433", "05cdf2"
+    worker = ProofWorker()
+    proof_token = worker.calc_proof_token(seed, difficulty)
+    print(f"proof_token: {proof_token}")
+    # python -m networks.proof_worker

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+aiohttp
+curl_cffi
+fastapi
+httpx
+jinja2
+markdown2[all]
+openai
+protobuf
+pydantic
+requests
+sentencepiece
+sse_starlette
+termcolor
+tclogger
+tiktoken
+transformers
+uvicorn
+websockets

tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

tests/openai.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import copy
+import json
+import re
+import uuid
+from pathlib import Path
+from curl_cffi import requests
+from tclogger import logger, OSEnver
+from constants.envs import PROXIES
+class OpenaiAPI:
+    def __init__(self):
+        self.init_requests_params()
+    def init_requests_params(self):
+        self.api_base = "https://chat.openai.com/backend-anon"
+        self.api_me = f"{self.api_base}/me"
+        self.api_models = f"{self.api_base}/models"
+        self.api_chat_requirements = f"{self.api_base}/sentinel/chat-requirements"
+        self.api_conversation = f"{self.api_base}/conversation"
+        self.uuid = str(uuid.uuid4())
+        self.requests_headers = {
+            # "Accept": "*/*",
+            "Accept-Encoding": "gzip, deflate, br, zstd",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Cache-Control": "no-cache",
+            "Content-Type": "application/json",
+            "Oai-Device-Id": self.uuid,
+            "Oai-Language": "en-US",
+            "Pragma": "no-cache",
+            "Referer": "https://chat.openai.com/",
+            "Sec-Ch-Ua": 'Google Chrome";v="123", "Not:A-Brand";v="8", "Chromium";v="123"',
+            "Sec-Ch-Ua-Mobile": "?0",
+            "Sec-Ch-Ua-Platform": '"Windows"',
+            "Sec-Fetch-Dest": "empty",
+            "Sec-Fetch-Mode": "cors",
+            "Sec-Fetch-Site": "same-origin",
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
+        }
+    def log_request(self, url, method="GET"):
+        logger.note(f"> {method}:", end=" ")
+        logger.mesg(f"{url}", end=" ")
+    def log_response(self, res: requests.Response, stream=False, verbose=False):
+        status_code = res.status_code
+        status_code_str = f"[{status_code}]"
+        if status_code == 200:
+            logger_func = logger.success
+        else:
+            logger_func = logger.warn
+        logger_func(status_code_str)
+        if verbose:
+            if stream:
+                if not hasattr(self, "content_offset"):
+                    self.content_offset = 0
+                for line in res.iter_lines():
+                    line = line.decode("utf-8")
+                    line = re.sub(r"^data:\s*", "", line)
+                    if re.match(r"^\[DONE\]", line):
+                        logger.success("\n[Finished]")
+                        break
+                    line = line.strip()
+                    if line:
+                        try:
+                            data = json.loads(line, strict=False)
+                            message_role = data["message"]["author"]["role"]
+                            message_status = data["message"]["status"]
+                            if (
+                                message_role == "assistant"
+                                and message_status == "in_progress"
+                            ):
+                                content = data["message"]["content"]["parts"][0]
+                                delta_content = content[self.content_offset :]
+                                self.content_offset = len(content)
+                                logger_func(delta_content, end="")
+                        except Exception as e:
+                            logger.warn(e)
+            else:
+                logger_func(res.json())
+    def get_models(self):
+        self.log_request(self.api_models)
+        res = requests.get(
+            self.api_models,
+            headers=self.requests_headers,
+            proxies=PROXIES,
+            timeout=10,
+            impersonate="chrome120",
+        )
+        self.log_response(res)
+    def auth(self):
+        self.log_request(self.api_chat_requirements, method="POST")
+        res = requests.post(
+            self.api_chat_requirements,
+            headers=self.requests_headers,
+            proxies=PROXIES,
+            timeout=10,
+            impersonate="chrome120",
+        )
+        self.chat_requirements_token = res.json()["token"]
+        self.log_response(res)
+    def transform_messages(self, messages: list[dict]):
+        def get_role(role):
+            if role in ["system", "user", "assistant"]:
+                return role
+            else:
+                return "system"
+        new_messages = [
+            {
+                "author": {"role": get_role(message["role"])},
+                "content": {"content_type": "text", "parts": [message["content"]]},
+                "metadata": {},
+            }
+            for message in messages
+        ]
+        return new_messages
+    def chat_completions(self, messages: list[dict]):
+        new_headers = {
+            "Accept": "text/event-stream",
+            "Openai-Sentinel-Chat-Requirements-Token": self.chat_requirements_token,
+        }
+        requests_headers = copy.deepcopy(self.requests_headers)
+        requests_headers.update(new_headers)
+        post_data = {
+            "action": "next",
+            "messages": self.transform_messages(messages),
+            "parent_message_id": "",
+            "model": "text-davinci-002-render-sha",
+            "timezone_offset_min": -480,
+            "suggestions": [],
+            "history_and_training_disabled": False,
+            "conversation_mode": {"kind": "primary_assistant"},
+            "force_paragen": False,
+            "force_paragen_model_slug": "",
+            "force_nulligen": False,
+            "force_rate_limit": False,
+            "websocket_request_id": str(uuid.uuid4()),
+        }
+        self.log_request(self.api_conversation, method="POST")
+        s = requests.Session()
+        res = s.post(
+            self.api_conversation,
+            headers=requests_headers,
+            json=post_data,
+            proxies=PROXIES,
+            timeout=10,
+            impersonate="chrome120",
+            stream=True,
+        )
+        self.log_response(res, stream=True, verbose=True)
+if __name__ == "__main__":
+    api = OpenaiAPI()
+    # api.get_models()
+    api.auth()
+    messages = [
+        {"role": "system", "content": "I am Niansuh"},
+        {"role": "system", "content": "I have a cat named Lucky"},
+        {"role": "user", "content": "Repeat my name and my cat's name"},
+        {
+            "role": "assistant",
+            "content": "Your name is Niansuh and your cat's name is Lucky.",
+        },
+        {"role": "user", "content": "summarize our conversation"},
+    ]
+    api.chat_completions(messages)
+    # python -m tests.openai

vercel.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    "builds": [
+        {
+            "src": "apis/chat_api.py",
+            "use": "@vercel/python"
+        }
+    ],
+    "routes": [
+        {
+            "src": "/(.*)",
+            "dest": "/apis/chat_api.py"
+        }
+    ],
+    "env": {
+        "APP_MODULE": "apis.chat_api:app"
+    }
+}