matthoffner
commited on
Commit
•
9e9fcb0
1
Parent(s):
463d78b
Update main.py
Browse files
main.py
CHANGED
@@ -92,35 +92,44 @@ async def chat(request: ChatCompletionRequest):
|
|
92 |
|
93 |
return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
|
94 |
|
95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
async def chatV2(request: Request, body: ChatCompletionRequest):
|
97 |
combined_messages = ' '.join([message.content for message in body.messages])
|
98 |
tokens = llm.tokenize(combined_messages)
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
iterator: Generator = await run_sync(llm.generate, tokens)
|
106 |
-
for chat_chunk in iterator:
|
107 |
-
response = {
|
108 |
-
'choices': [
|
109 |
-
{
|
110 |
-
'message': {
|
111 |
-
'role': 'system',
|
112 |
-
'content': llm.detokenize(chat_chunk)
|
113 |
-
},
|
114 |
-
'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
|
115 |
-
}
|
116 |
-
]
|
117 |
-
}
|
118 |
-
await inner_send_chan.send(f"data: {json.dumps(response)}\n\n")
|
119 |
-
await inner_send_chan.send("event: done\ndata: {}\n\n")
|
120 |
-
except Exception as e:
|
121 |
-
print(f"Exception in event publisher: {str(e)}")
|
122 |
-
|
123 |
-
return StreamingResponse(recv_chan, media_type="text/event-stream", data_sender_callable=partial(event_publisher, send_chan))
|
124 |
|
125 |
@app.post("/v0/chat/completions")
|
126 |
async def chat(request: ChatCompletionRequestV0, response_mode=None):
|
|
|
92 |
|
93 |
return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
|
94 |
|
95 |
+
async def stream_response(send: Callable) -> None:
|
96 |
+
async with send:
|
97 |
+
try:
|
98 |
+
iterator: Generator = llm.generate(tokens)
|
99 |
+
async for chat_chunk in iterator:
|
100 |
+
response = {
|
101 |
+
'choices': [
|
102 |
+
{
|
103 |
+
'message': {
|
104 |
+
'role': 'system',
|
105 |
+
'content': llm.detokenize(chat_chunk)
|
106 |
+
},
|
107 |
+
'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
|
108 |
+
}
|
109 |
+
]
|
110 |
+
}
|
111 |
+
await send({
|
112 |
+
"type": "http.response.body",
|
113 |
+
"body": f"data: {json.dumps(response)}\n\n".encode("utf-8"),
|
114 |
+
"more_body": True,
|
115 |
+
})
|
116 |
+
await send({
|
117 |
+
"type": "http.response.body",
|
118 |
+
"body": b"event: done\ndata: {}\n\n",
|
119 |
+
"more_body": False,
|
120 |
+
})
|
121 |
+
except Exception as e:
|
122 |
+
print(f"Exception in event publisher: {str(e)}")
|
123 |
+
|
124 |
async def chatV2(request: Request, body: ChatCompletionRequest):
|
125 |
combined_messages = ' '.join([message.content for message in body.messages])
|
126 |
tokens = llm.tokenize(combined_messages)
|
127 |
|
128 |
+
return ResponseGenerator(stream_response)
|
129 |
+
|
130 |
+
@app.post("/v2/chat/completions")
|
131 |
+
async def chatV2(request: Request, body: ChatCompletionRequest):
|
132 |
+
return await chatV2(request, body)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
|
134 |
@app.post("/v0/chat/completions")
|
135 |
async def chat(request: ChatCompletionRequestV0, response_mode=None):
|