Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Fedir Zadniprovskyi
commited on
Commit
·
b20cbad
1
Parent(s):
3a14175
feat: ollama-like ps endpoints
Browse files
faster_whisper_server/main.py
CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3 |
import asyncio
|
4 |
from collections import OrderedDict
|
5 |
from contextlib import asynccontextmanager
|
|
|
6 |
from io import BytesIO
|
7 |
import time
|
8 |
from typing import TYPE_CHECKING, Annotated, Literal
|
@@ -107,6 +108,29 @@ def health() -> Response:
|
|
107 |
return Response(status_code=200, content="OK")
|
108 |
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
@app.get("/v1/models")
|
111 |
def get_models() -> ModelListResponse:
|
112 |
models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)
|
|
|
3 |
import asyncio
|
4 |
from collections import OrderedDict
|
5 |
from contextlib import asynccontextmanager
|
6 |
+
import gc
|
7 |
from io import BytesIO
|
8 |
import time
|
9 |
from typing import TYPE_CHECKING, Annotated, Literal
|
|
|
108 |
return Response(status_code=200, content="OK")
|
109 |
|
110 |
|
111 |
+
@app.get("/api/ps", tags=["experimental"], summary="Get a list of loaded models.")
|
112 |
+
def get_running_models() -> dict[str, list[str]]:
|
113 |
+
return {"models": list(loaded_models.keys())}
|
114 |
+
|
115 |
+
|
116 |
+
@app.post("/api/ps/{model_name:path}", tags=["experimental"], summary="Load a model into memory.")
|
117 |
+
def load_model_route(model_name: str) -> Response:
|
118 |
+
if model_name in loaded_models:
|
119 |
+
return Response(status_code=409, content="Model already loaded")
|
120 |
+
load_model(model_name)
|
121 |
+
return Response(status_code=201)
|
122 |
+
|
123 |
+
|
124 |
+
@app.delete("/api/ps/{model_name:path}", tags=["experimental"], summary="Unload a model from memory.")
|
125 |
+
def stop_running_model(model_name: str) -> Response:
|
126 |
+
model = loaded_models.get(model_name)
|
127 |
+
if model is not None:
|
128 |
+
del loaded_models[model_name]
|
129 |
+
gc.collect()
|
130 |
+
return Response(status_code=204)
|
131 |
+
return Response(status_code=404)
|
132 |
+
|
133 |
+
|
134 |
@app.get("/v1/models")
|
135 |
def get_models() -> ModelListResponse:
|
136 |
models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)
|