wizardcoder-ggml

Paused

App Files Files Community

matthoffner commited on Jun 27, 2023

Commit

9e9fcb0

•

1 Parent(s): 463d78b

Update main.py

Browse files

Files changed (1) hide show

main.py +34 -25

main.py CHANGED Viewed

@@ -92,35 +92,44 @@ async def chat(request: ChatCompletionRequest):
     return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
-@app.post("/v2/chat/completions")
 async def chatV2(request: Request, body: ChatCompletionRequest):
     combined_messages = ' '.join([message.content for message in body.messages])
     tokens = llm.tokenize(combined_messages)
-    send_chan, recv_chan = create_memory_object_stream(10)
-    async def event_publisher(inner_send_chan):
-        async with inner_send_chan:
-            try:
-                iterator: Generator = await run_sync(llm.generate, tokens)
-                for chat_chunk in iterator:
-                    response = {
-                        'choices': [
-                            {
-                                'message': {
-                                    'role': 'system',
-                                    'content': llm.detokenize(chat_chunk)
-                                },
-                                'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
-                            }
-                        ]
-                    }
-                    await inner_send_chan.send(f"data: {json.dumps(response)}\n\n")
-                await inner_send_chan.send("event: done\ndata: {}\n\n")
-            except Exception as e:
-                print(f"Exception in event publisher: {str(e)}")
-    return StreamingResponse(recv_chan, media_type="text/event-stream", data_sender_callable=partial(event_publisher, send_chan))
 @app.post("/v0/chat/completions")
 async def chat(request: ChatCompletionRequestV0, response_mode=None):

     return StreamingResponse(format_response(chat_chunks), media_type="text/event-stream")
+async def stream_response(send: Callable) -> None:
+    async with send:
+        try:
+            iterator: Generator = llm.generate(tokens)
+            async for chat_chunk in iterator:
+                response = {
+                    'choices': [
+                        {
+                            'message': {
+                                'role': 'system',
+                                'content': llm.detokenize(chat_chunk)
+                            },
+                            'finish_reason': 'stop' if llm.detokenize(chat_chunk) == "[DONE]" else 'unknown'
+                        }
+                    ]
+                }
+                await send({
+                    "type": "http.response.body",
+                    "body": f"data: {json.dumps(response)}\n\n".encode("utf-8"),
+                    "more_body": True,
+                })
+            await send({
+                "type": "http.response.body",
+                "body": b"event: done\ndata: {}\n\n",
+                "more_body": False,
+            })
+        except Exception as e:
+            print(f"Exception in event publisher: {str(e)}")
 async def chatV2(request: Request, body: ChatCompletionRequest):
     combined_messages = ' '.join([message.content for message in body.messages])
     tokens = llm.tokenize(combined_messages)
+    return ResponseGenerator(stream_response)
+@app.post("/v2/chat/completions")
+async def chatV2(request: Request, body: ChatCompletionRequest):
+    return await chatV2(request, body)
 @app.post("/v0/chat/completions")
 async def chat(request: ChatCompletionRequestV0, response_mode=None):