Fedir Zadniprovskyi commited on
Commit
b20cbad
·
1 Parent(s): 3a14175

feat: ollama-like ps endpoints

Browse files
Files changed (1) hide show
  1. faster_whisper_server/main.py +24 -0
faster_whisper_server/main.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
  import asyncio
4
  from collections import OrderedDict
5
  from contextlib import asynccontextmanager
 
6
  from io import BytesIO
7
  import time
8
  from typing import TYPE_CHECKING, Annotated, Literal
@@ -107,6 +108,29 @@ def health() -> Response:
107
  return Response(status_code=200, content="OK")
108
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  @app.get("/v1/models")
111
  def get_models() -> ModelListResponse:
112
  models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)
 
3
  import asyncio
4
  from collections import OrderedDict
5
  from contextlib import asynccontextmanager
6
+ import gc
7
  from io import BytesIO
8
  import time
9
  from typing import TYPE_CHECKING, Annotated, Literal
 
108
  return Response(status_code=200, content="OK")
109
 
110
 
111
+ @app.get("/api/ps", tags=["experimental"], summary="Get a list of loaded models.")
112
+ def get_running_models() -> dict[str, list[str]]:
113
+ return {"models": list(loaded_models.keys())}
114
+
115
+
116
+ @app.post("/api/ps/{model_name:path}", tags=["experimental"], summary="Load a model into memory.")
117
+ def load_model_route(model_name: str) -> Response:
118
+ if model_name in loaded_models:
119
+ return Response(status_code=409, content="Model already loaded")
120
+ load_model(model_name)
121
+ return Response(status_code=201)
122
+
123
+
124
+ @app.delete("/api/ps/{model_name:path}", tags=["experimental"], summary="Unload a model from memory.")
125
+ def stop_running_model(model_name: str) -> Response:
126
+ model = loaded_models.get(model_name)
127
+ if model is not None:
128
+ del loaded_models[model_name]
129
+ gc.collect()
130
+ return Response(status_code=204)
131
+ return Response(status_code=404)
132
+
133
+
134
  @app.get("/v1/models")
135
  def get_models() -> ModelListResponse:
136
  models = huggingface_hub.list_models(library="ctranslate2", tags="automatic-speech-recognition", cardData=True)