Commit
·
210500b
1
Parent(s):
0d49ac1
Update main.py
Browse files
main.py
CHANGED
@@ -3,7 +3,7 @@ import json
|
|
3 |
import markdown
|
4 |
import uvicorn
|
5 |
from fastapi import HTTPException
|
6 |
-
from fastapi.responses import HTMLResponse
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
8 |
from sse_starlette.sse import EventSourceResponse
|
9 |
from ctransformers import AutoModelForCausalLM
|
@@ -14,7 +14,7 @@ from typing import List, Dict, Any
|
|
14 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
|
15 |
model_file="WizardCoder-15B-1.0.ggmlv3.q4_0.bin",
|
16 |
model_type="starcoder")
|
17 |
-
app = fastapi.FastAPI(title="WizardCoder")
|
18 |
app.add_middleware(
|
19 |
CORSMiddleware,
|
20 |
allow_origins=["*"],
|
@@ -59,7 +59,7 @@ async def completion(request: ChatCompletionRequest, response_mode=None):
|
|
59 |
response = llm(request.prompt)
|
60 |
return response
|
61 |
|
62 |
-
@app.post("/
|
63 |
async def chat(request: ChatCompletionRequestV2):
|
64 |
tokens = llm.tokenize([message.content for message in request.messages])
|
65 |
|
@@ -84,7 +84,7 @@ async def chat(request: ChatCompletionRequestV2):
|
|
84 |
|
85 |
return format_response(chat_chunks)
|
86 |
|
87 |
-
@app.post("/
|
88 |
async def chat(request: ChatCompletionRequest, response_mode=None):
|
89 |
tokens = llm.tokenize(request.prompt)
|
90 |
async def server_sent_events(chat_chunks, llm):
|
|
|
3 |
import markdown
|
4 |
import uvicorn
|
5 |
from fastapi import HTTPException
|
6 |
+
from fastapi.responses import HTMLResponse
|
7 |
from fastapi.middleware.cors import CORSMiddleware
|
8 |
from sse_starlette.sse import EventSourceResponse
|
9 |
from ctransformers import AutoModelForCausalLM
|
|
|
14 |
llm = AutoModelForCausalLM.from_pretrained("TheBloke/WizardCoder-15B-1.0-GGML",
|
15 |
model_file="WizardCoder-15B-1.0.ggmlv3.q4_0.bin",
|
16 |
model_type="starcoder")
|
17 |
+
app = fastapi.FastAPI(title="🪄WizardCoder💫")
|
18 |
app.add_middleware(
|
19 |
CORSMiddleware,
|
20 |
allow_origins=["*"],
|
|
|
59 |
response = llm(request.prompt)
|
60 |
return response
|
61 |
|
62 |
+
@app.post("/v1/chat/completions")
|
63 |
async def chat(request: ChatCompletionRequestV2):
|
64 |
tokens = llm.tokenize([message.content for message in request.messages])
|
65 |
|
|
|
84 |
|
85 |
return format_response(chat_chunks)
|
86 |
|
87 |
+
@app.post("/v0/chat/completions")
|
88 |
async def chat(request: ChatCompletionRequest, response_mode=None):
|
89 |
tokens = llm.tokenize(request.prompt)
|
90 |
async def server_sent_events(chat_chunks, llm):
|