|
import time |
|
import json |
|
|
|
from fastapi import FastAPI |
|
from fastapi.responses import StreamingResponse |
|
from gradio_client import Client |
|
|
|
app = FastAPI() |
|
client = Client("AWeirdDev/mistral-7b-instruct-v0.2") |
|
|
|
async def stream(iter): |
|
while True: |
|
try: |
|
value = await asyncio.to_thread(iter.__next__) |
|
yield value |
|
except StopIteration: |
|
break |
|
|
|
def make_chunk_obj(i, delta, fr): |
|
return { |
|
"id": str(time.time_ns()), |
|
"object": "chat.completion.chunk", |
|
"created": round(time.time()), |
|
"model": "mistral-7b-instruct-v0.2", |
|
"system_fingerprint": "wtf", |
|
"choices": [ |
|
{ |
|
"index": i, |
|
"delta": { |
|
"content": delta |
|
}, |
|
"finish_reason": fr |
|
} |
|
] |
|
} |
|
|
|
@app.get('/chat/completions') |
|
async def index(): |
|
def streamer(): |
|
text = "" |
|
result = client.submit( |
|
"Hello!!", |
|
0.9, |
|
4096, |
|
.9, |
|
1, |
|
api_name="/chat" |
|
) |
|
for i, item in enumerate(result): |
|
delta = item[len(text):] |
|
yield "data: " + json.dumps( |
|
make_chunk_obj(i, delta, None) |
|
) |
|
text = item |
|
|
|
yield "data: " + json.dumps(make_chunk_obj(i, delta, "stop")) |
|
yield "data: [END]" |
|
|
|
return StreamingResponse(streamer()) |
|
|
|
print(result) |