AWeirdDev's picture
Create app.py
5bb4c9e verified
raw
history blame
1.78 kB
import time
import json
from fastapi import FastAPI
from fastapi.responses import StreamingResponse
from gradio_client import Client
app = FastAPI()
client = Client("AWeirdDev/mistral-7b-instruct-v0.2")
async def stream(iter):
while True:
try:
value = await asyncio.to_thread(iter.__next__)
yield value
except StopIteration:
break
def make_chunk_obj(i, delta, fr):
return {
"id": str(time.time_ns()),
"object": "chat.completion.chunk",
"created": round(time.time()),
"model": "mistral-7b-instruct-v0.2",
"system_fingerprint": "wtf",
"choices": [
{
"index": i,
"delta": {
"content": delta
},
"finish_reason": fr
}
]
}
@app.get('/chat/completions')
async def index():
def streamer():
text = ""
result = client.submit(
"Hello!!",
0.9, # float (numeric value between 0.0 and 1.0) in 'Temperature' Slider component
4096, # float (numeric value between 0 and 1048) in 'Max new tokens' Slider component
.9, # float (numeric value between 0.0 and 1) in 'Top-p (nucleus sampling)' Slider component
1, # float (numeric value between 1.0 and 2.0) in 'Repetition penalty' Slider component
api_name="/chat"
)
for i, item in enumerate(result):
delta = item[len(text):]
yield "data: " + json.dumps(
make_chunk_obj(i, delta, None)
)
text = item
yield "data: " + json.dumps(make_chunk_obj(i, delta, "stop"))
yield "data: [END]"
return StreamingResponse(streamer())
print(result)