Spaces:

speaches-ai
/

speaches

Running on CPU Upgrade

App Files Files Community

Fedir Zadniprovskyi commited on Jul 3, 2024

Commit

dc4f25f

1 Parent(s): 8ad4ca5

chore: fix ruff errors

Browse files

Files changed (14) hide show

faster_whisper_server/asr.py +2 -2
faster_whisper_server/audio.py +8 -6
faster_whisper_server/config.py +4 -4
faster_whisper_server/core.py +15 -35
faster_whisper_server/gradio_app.py +11 -26
faster_whisper_server/logger.py +1 -3
faster_whisper_server/main.py +38 -79
faster_whisper_server/server_models.py +11 -21
faster_whisper_server/transcriber.py +6 -2
pyproject.toml +24 -7
tests/api_model_test.py +4 -6
tests/app_test.py +9 -13
tests/conftest.py +4 -7
tests/sse_test.py +9 -19

faster_whisper_server/asr.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import time
-from typing import Iterable
 from faster_whisper import transcribe
@@ -45,7 +45,7 @@ class FasterWhisperASR:
         audio: Audio,
         prompt: str | None = None,
     ) -> tuple[Transcription, transcribe.TranscriptionInfo]:
-        """Wrapper around _transcribe so it can be used in async context"""
         # is this the optimal way to execute a blocking call in an async context?
         # TODO: verify performance when running inference on a CPU
         return await asyncio.get_running_loop().run_in_executor(

 import asyncio
+from collections.abc import Iterable
 import time
 from faster_whisper import transcribe
         audio: Audio,
         prompt: str | None = None,
     ) -> tuple[Transcription, transcribe.TranscriptionInfo]:
+        """Wrapper around _transcribe so it can be used in async context."""
         # is this the optimal way to execute a blocking call in an async context?
         # TODO: verify performance when running inference on a CPU
         return await asyncio.get_running_loop().run_in_executor(

faster_whisper_server/audio.py CHANGED Viewed

@@ -1,15 +1,19 @@
 from __future__ import annotations
 import asyncio
-from typing import AsyncGenerator, BinaryIO
 import numpy as np
 import soundfile as sf
-from numpy.typing import NDArray
 from faster_whisper_server.config import SAMPLES_PER_SECOND
 from faster_whisper_server.logger import logger
 def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
     audio_and_sample_rate = sf.read(
@@ -22,7 +26,7 @@ def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
         endian="LITTLE",
     )
     audio = audio_and_sample_rate[0]
-    return audio  # type: ignore
 class Audio:
@@ -78,9 +82,7 @@ class AudioStream(Audio):
         self.modify_event.set()
         logger.info("AudioStream closed")
-    async def chunks(
-        self, min_duration: float
-    ) -> AsyncGenerator[NDArray[np.float32], None]:
         i = 0.0  # end time of last chunk
         while True:
             await self.modify_event.wait()

 from __future__ import annotations
 import asyncio
+from typing import TYPE_CHECKING, BinaryIO
 import numpy as np
 import soundfile as sf
 from faster_whisper_server.config import SAMPLES_PER_SECOND
 from faster_whisper_server.logger import logger
+if TYPE_CHECKING:
+    from collections.abc import AsyncGenerator
+    from numpy.typing import NDArray
 def audio_samples_from_file(file: BinaryIO) -> NDArray[np.float32]:
     audio_and_sample_rate = sf.read(
         endian="LITTLE",
     )
     audio = audio_and_sample_rate[0]
+    return audio  # pyright: ignore[reportReturnType]
 class Audio:
         self.modify_event.set()
         logger.info("AudioStream closed")
+    async def chunks(self, min_duration: float) -> AsyncGenerator[NDArray[np.float32], None]:
         i = 0.0  # end time of last chunk
         while True:
             await self.modify_event.wait()

faster_whisper_server/config.py CHANGED Viewed

@@ -15,7 +15,7 @@ class ResponseFormat(enum.StrEnum):
     TEXT = "text"
     JSON = "json"
     VERBOSE_JSON = "verbose_json"
-    # NOTE: While inspecting outputs of these formats with `curl`, I noticed there's one or two "\n" inserted at the end of the response.
     # VTT = "vtt" # TODO
     # 1
@@ -185,8 +185,8 @@ class WhisperConfig(BaseModel):
 class Config(BaseSettings):
-    """
-    Configuration for the application. Values can be set via environment variables.
     Pydantic will automatically handle mapping uppercased environment variables to the corresponding fields.
     To populate nested, the environment should be prefixed with the nested field name and an underscore. For example,
     the environment variable `LOG_LEVEL` will be mapped to `log_level`, `WHISPER_MODEL` to `whisper.model`, etc.
@@ -208,7 +208,7 @@ class Config(BaseSettings):
     max_inactivity_seconds: float = 5.0
     """
     Max allowed audio duration without any speech being detected before transcription is finilized and connection is closed.
-    """
     inactivity_window_seconds: float = 10.0
     """
     Controls how many latest seconds of audio are being passed through VAD.

     TEXT = "text"
     JSON = "json"
     VERBOSE_JSON = "verbose_json"
+    # NOTE: While inspecting outputs of these formats with `curl`, I noticed there's one or two "\n" inserted at the end of the response. # noqa: E501
     # VTT = "vtt" # TODO
     # 1
 class Config(BaseSettings):
+    """Configuration for the application. Values can be set via environment variables.
     Pydantic will automatically handle mapping uppercased environment variables to the corresponding fields.
     To populate nested, the environment should be prefixed with the nested field name and an underscore. For example,
     the environment variable `LOG_LEVEL` will be mapped to `log_level`, `WHISPER_MODEL` to `whisper.model`, etc.
     max_inactivity_seconds: float = 5.0
     """
     Max allowed audio duration without any speech being detected before transcription is finilized and connection is closed.
+    """  # noqa: E501
     inactivity_window_seconds: float = 10.0
     """
     Controls how many latest seconds of audio are being passed through VAD.

faster_whisper_server/core.py CHANGED Viewed

@@ -1,8 +1,8 @@
 # TODO: rename module
 from __future__ import annotations
-import re
 from dataclasses import dataclass
 from faster_whisper_server.config import config
@@ -18,10 +18,7 @@ class Segment:
     def is_eos(self) -> bool:
         if self.text.endswith("..."):
             return False
-        for punctuation_symbol in ".?!":
-            if self.text.endswith(punctuation_symbol):
-                return True
-        return False
     def offset(self, seconds: float) -> None:
         self.start += seconds
@@ -36,11 +33,7 @@ class Word(Segment):
     @classmethod
     def common_prefix(cls, a: list[Word], b: list[Word]) -> list[Word]:
         i = 0
-        while (
-            i < len(a)
-            and i < len(b)
-            and canonicalize_word(a[i].text) == canonicalize_word(b[i].text)
-        ):
             i += 1
         return a[:i]
@@ -67,9 +60,7 @@ class Transcription:
         return self.end - self.start
     def after(self, seconds: float) -> Transcription:
-        return Transcription(
-            words=[word for word in self.words if word.start > seconds]
-        )
     def extend(self, words: list[Word]) -> None:
         self._ensure_no_word_overlap(words)
@@ -77,21 +68,16 @@ class Transcription:
     def _ensure_no_word_overlap(self, words: list[Word]) -> None:
         if len(self.words) > 0 and len(words) > 0:
-            if (
-                words[0].start + config.word_timestamp_error_margin
-                <= self.words[-1].end
-            ):
                 raise ValueError(
-                    f"Words overlap: {self.words[-1]} and {words[0]}. Error margin: {config.word_timestamp_error_margin}"
                 )
         for i in range(1, len(words)):
             if words[i].start + config.word_timestamp_error_margin <= words[i - 1].end:
-                raise ValueError(
-                    f"Words overlap: {words[i - 1]} and {words[i]}. All words: {words}"
-                )
-def test_segment_is_eos():
     assert not Segment("Hello").is_eos
     assert not Segment("Hello...").is_eos
     assert Segment("Hello.").is_eos
@@ -117,16 +103,14 @@ def to_full_sentences(words: list[Word]) -> list[Segment]:
     return sentences
-def tests_to_full_sentences():
     assert to_full_sentences([]) == []
     assert to_full_sentences([Word(text="Hello")]) == []
     assert to_full_sentences([Word(text="Hello..."), Word(" world")]) == []
-    assert to_full_sentences([Word(text="Hello..."), Word(" world.")]) == [
         Segment(text="Hello... world.")
     ]
-    assert to_full_sentences(
-        [Word(text="Hello..."), Word(" world."), Word(" How")]
-    ) == [Segment(text="Hello... world.")]
 def to_text(words: list[Word]) -> str:
@@ -144,7 +128,7 @@ def canonicalize_word(text: str) -> str:
     return text.lower().strip().strip(".,?!")
-def test_canonicalize_word():
     assert canonicalize_word("ABC") == "abc"
     assert canonicalize_word("...ABC?") == "abc"
     assert canonicalize_word("... AbC  ...") == "abc"
@@ -152,16 +136,12 @@ def test_canonicalize_word():
 def common_prefix(a: list[Word], b: list[Word]) -> list[Word]:
     i = 0
-    while (
-        i < len(a)
-        and i < len(b)
-        and canonicalize_word(a[i].text) == canonicalize_word(b[i].text)
-    ):
         i += 1
     return a[:i]
-def test_common_prefix():
     def word(text: str) -> Word:
         return Word(text=text, start=0.0, end=0.0, probability=0.0)
@@ -194,7 +174,7 @@ def test_common_prefix():
     assert common_prefix(a, b) == []
-def test_common_prefix_and_canonicalization():
     def word(text: str) -> Word:
         return Word(text=text, start=0.0, end=0.0, probability=0.0)

 # TODO: rename module
 from __future__ import annotations
 from dataclasses import dataclass
+import re
 from faster_whisper_server.config import config
     def is_eos(self) -> bool:
         if self.text.endswith("..."):
             return False
+        return any(self.text.endswith(punctuation_symbol) for punctuation_symbol in ".?!")
     def offset(self, seconds: float) -> None:
         self.start += seconds
     @classmethod
     def common_prefix(cls, a: list[Word], b: list[Word]) -> list[Word]:
         i = 0
+        while i < len(a) and i < len(b) and canonicalize_word(a[i].text) == canonicalize_word(b[i].text):
             i += 1
         return a[:i]
         return self.end - self.start
     def after(self, seconds: float) -> Transcription:
+        return Transcription(words=[word for word in self.words if word.start > seconds])
     def extend(self, words: list[Word]) -> None:
         self._ensure_no_word_overlap(words)
     def _ensure_no_word_overlap(self, words: list[Word]) -> None:
         if len(self.words) > 0 and len(words) > 0:
+            if words[0].start + config.word_timestamp_error_margin <= self.words[-1].end:
                 raise ValueError(
+                    f"Words overlap: {self.words[-1]} and {words[0]}. Error margin: {config.word_timestamp_error_margin}"  # noqa: E501
                 )
         for i in range(1, len(words)):
             if words[i].start + config.word_timestamp_error_margin <= words[i - 1].end:
+                raise ValueError(f"Words overlap: {words[i - 1]} and {words[i]}. All words: {words}")
+def test_segment_is_eos() -> None:
     assert not Segment("Hello").is_eos
     assert not Segment("Hello...").is_eos
     assert Segment("Hello.").is_eos
     return sentences
+def tests_to_full_sentences() -> None:
     assert to_full_sentences([]) == []
     assert to_full_sentences([Word(text="Hello")]) == []
     assert to_full_sentences([Word(text="Hello..."), Word(" world")]) == []
+    assert to_full_sentences([Word(text="Hello..."), Word(" world.")]) == [Segment(text="Hello... world.")]
+    assert to_full_sentences([Word(text="Hello..."), Word(" world."), Word(" How")]) == [
         Segment(text="Hello... world.")
     ]
 def to_text(words: list[Word]) -> str:
     return text.lower().strip().strip(".,?!")
+def test_canonicalize_word() -> None:
     assert canonicalize_word("ABC") == "abc"
     assert canonicalize_word("...ABC?") == "abc"
     assert canonicalize_word("... AbC  ...") == "abc"
 def common_prefix(a: list[Word], b: list[Word]) -> list[Word]:
     i = 0
+    while i < len(a) and i < len(b) and canonicalize_word(a[i].text) == canonicalize_word(b[i].text):
         i += 1
     return a[:i]
+def test_common_prefix() -> None:
     def word(text: str) -> Word:
         return Word(text=text, start=0.0, end=0.0, probability=0.0)
     assert common_prefix(a, b) == []
+def test_common_prefix_and_canonicalization() -> None:
     def word(text: str) -> Word:
         return Word(text=text, start=0.0, end=0.0, probability=0.0)

faster_whisper_server/gradio_app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import Generator
 import gradio as gr
 import httpx
@@ -13,26 +13,20 @@ TRANSLATION_ENDPOINT = "/v1/audio/translations"
 def create_gradio_demo(config: Config) -> gr.Blocks:
     host = os.getenv("UVICORN_HOST", "0.0.0.0")
-    port = os.getenv("UVICORN_PORT", 8000)
     # NOTE: worth looking into generated clients
     http_client = httpx.Client(base_url=f"http://{host}:{port}", timeout=None)
-    def handler(
-        file_path: str, model: str, task: Task, temperature: float, stream: bool
-    ) -> Generator[str, None, None]:
         if stream:
             previous_transcription = ""
-            for transcription in transcribe_audio_streaming(
-                file_path, task, temperature, model
-            ):
                 previous_transcription += transcription
                 yield previous_transcription
         else:
             yield transcribe_audio(file_path, task, temperature, model)
-    def transcribe_audio(
-        file_path: str, task: Task, temperature: float, model: str
-    ) -> str:
         if task == Task.TRANSCRIBE:
             endpoint = TRANSCRIPTION_ENDPOINT
         elif task == Task.TRANSLATE:
@@ -65,11 +59,7 @@ def create_gradio_demo(config: Config) -> gr.Blocks:
                     "stream": True,
                 },
             }
-            endpoint = (
-                TRANSCRIPTION_ENDPOINT
-                if task == Task.TRANSCRIBE
-                else TRANSLATION_ENDPOINT
-            )
             with connect_sse(http_client, "POST", endpoint, **kwargs) as event_source:
                 for event in event_source.iter_sse():
                     yield event.data
@@ -79,18 +69,15 @@ def create_gradio_demo(config: Config) -> gr.Blocks:
         res_data = res.json()
         models: list[str] = [model["id"] for model in res_data]
         assert config.whisper.model in models
-        recommended_models = set(
-            model for model in models if model.startswith("Systran")
-        )
         other_models = [model for model in models if model not in recommended_models]
         models = list(recommended_models) + other_models
-        model_dropdown = gr.Dropdown(
             # no idea why it's complaining
-            choices=models,  # type: ignore
             label="Model",
             value=config.whisper.model,
         )
-        return model_dropdown
     model_dropdown = gr.Dropdown(
         choices=[config.whisper.model],
@@ -102,13 +89,11 @@ def create_gradio_demo(config: Config) -> gr.Blocks:
         label="Task",
         value=Task.TRANSCRIBE,
     )
-    temperature_slider = gr.Slider(
-        minimum=0.0, maximum=1.0, step=0.1, label="Temperature", value=0.0
-    )
     stream_checkbox = gr.Checkbox(label="Stream", value=True)
     with gr.Interface(
         title="Whisper Playground",
-        description="""Consider supporting the project by starring the <a href="https://github.com/fedirz/faster-whisper-server">repository on GitHub</a>.""",
         inputs=[
             gr.Audio(type="filepath"),
             model_dropdown,

+from collections.abc import Generator
 import os
 import gradio as gr
 import httpx
 def create_gradio_demo(config: Config) -> gr.Blocks:
     host = os.getenv("UVICORN_HOST", "0.0.0.0")
+    port = int(os.getenv("UVICORN_PORT", "8000"))
     # NOTE: worth looking into generated clients
     http_client = httpx.Client(base_url=f"http://{host}:{port}", timeout=None)
+    def handler(file_path: str, model: str, task: Task, temperature: float, stream: bool) -> Generator[str, None, None]:
         if stream:
             previous_transcription = ""
+            for transcription in transcribe_audio_streaming(file_path, task, temperature, model):
                 previous_transcription += transcription
                 yield previous_transcription
         else:
             yield transcribe_audio(file_path, task, temperature, model)
+    def transcribe_audio(file_path: str, task: Task, temperature: float, model: str) -> str:
         if task == Task.TRANSCRIBE:
             endpoint = TRANSCRIPTION_ENDPOINT
         elif task == Task.TRANSLATE:
                     "stream": True,
                 },
             }
+            endpoint = TRANSCRIPTION_ENDPOINT if task == Task.TRANSCRIBE else TRANSLATION_ENDPOINT
             with connect_sse(http_client, "POST", endpoint, **kwargs) as event_source:
                 for event in event_source.iter_sse():
                     yield event.data
         res_data = res.json()
         models: list[str] = [model["id"] for model in res_data]
         assert config.whisper.model in models
+        recommended_models = {model for model in models if model.startswith("Systran")}
         other_models = [model for model in models if model not in recommended_models]
         models = list(recommended_models) + other_models
+        return gr.Dropdown(
             # no idea why it's complaining
+            choices=models,  # pyright: ignore[reportArgumentType]
             label="Model",
             value=config.whisper.model,
         )
     model_dropdown = gr.Dropdown(
         choices=[config.whisper.model],
         label="Task",
         value=Task.TRANSCRIBE,
     )
+    temperature_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label="Temperature", value=0.0)
     stream_checkbox = gr.Checkbox(label="Stream", value=True)
     with gr.Interface(
         title="Whisper Playground",
+        description="""Consider supporting the project by starring the <a href="https://github.com/fedirz/faster-whisper-server">repository on GitHub</a>.""",  # noqa: E501
         inputs=[
             gr.Audio(type="filepath"),
             model_dropdown,

faster_whisper_server/logger.py CHANGED Viewed

@@ -8,6 +8,4 @@ root_logger = logging.getLogger()
 root_logger.setLevel(logging.CRITICAL)
 logger = logging.getLogger(__name__)
 logger.setLevel(config.log_level.upper())
-logging.basicConfig(
-    format="%(asctime)s:%(levelname)s:%(name)s:%(funcName)s:%(message)s"
-)

 root_logger.setLevel(logging.CRITICAL)
 logger = logging.getLogger(__name__)
 logger.setLevel(config.log_level.upper())
+logging.basicConfig(format="%(asctime)s:%(levelname)s:%(name)s:%(funcName)s:%(message)s")

faster_whisper_server/main.py CHANGED Viewed

@@ -1,12 +1,11 @@
 from __future__ import annotations
 import asyncio
-import time
 from io import BytesIO
-from typing import Annotated, Generator, Iterable, Literal, OrderedDict
-import gradio as gr
-import huggingface_hub
 from fastapi import (
     FastAPI,
     Form,
@@ -21,9 +20,9 @@ from fastapi import (
 from fastapi.responses import StreamingResponse
 from fastapi.websockets import WebSocketState
 from faster_whisper import WhisperModel
-from faster_whisper.transcribe import Segment, TranscriptionInfo
 from faster_whisper.vad import VadOptions, get_speech_timestamps
-from huggingface_hub.hf_api import ModelInfo
 from pydantic import AfterValidator
 from faster_whisper_server import utils
@@ -45,6 +44,12 @@ from faster_whisper_server.server_models import (
 )
 from faster_whisper_server.transcriber import audio_transcriber
 loaded_models: OrderedDict[str, WhisperModel] = OrderedDict()
@@ -54,9 +59,7 @@ def load_model(model_name: str) -> WhisperModel:
         return loaded_models[model_name]
     if len(loaded_models) >= config.max_models:
         oldest_model_name = next(iter(loaded_models))
-        logger.info(
-            f"Max models ({config.max_models}) reached. Unloading the oldest model: {oldest_model_name}"
-        )
         del loaded_models[oldest_model_name]
     logger.debug(f"Loading {model_name}...")
     start = time.perf_counter()
@@ -67,7 +70,7 @@ def load_model(model_name: str) -> WhisperModel:
         compute_type=config.whisper.compute_type,
     )
     logger.info(
-        f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds. {config.whisper.inference_device}({config.whisper.compute_type}) will be used for inference."
     )
     loaded_models[model_name] = whisper
     return whisper
@@ -102,9 +105,7 @@ def get_models() -> list[ModelObject]:
 def get_model(
     model_name: Annotated[str, Path(example="Systran/faster-distil-whisper-large-v3")],
 ) -> ModelObject:
-    models = list(
-        huggingface_hub.list_models(model_name=model_name, library="ctranslate2")
-    )
     if len(models) == 0:
         raise HTTPException(status_code=404, detail="Model doesn't exists")
     exact_match: ModelInfo | None = None
@@ -132,14 +133,12 @@ def segments_to_response(
     response_format: ResponseFormat,
 ) -> str | TranscriptionJsonResponse | TranscriptionVerboseJsonResponse:
     segments = list(segments)
-    if response_format == ResponseFormat.TEXT:
         return utils.segments_text(segments)
     elif response_format == ResponseFormat.JSON:
         return TranscriptionJsonResponse.from_segments(segments)
     elif response_format == ResponseFormat.VERBOSE_JSON:
-        return TranscriptionVerboseJsonResponse.from_segments(
-            segments, transcription_info
-        )
 def format_as_sse(data: str) -> str:
@@ -156,26 +155,21 @@ def segments_to_streaming_response(
             if response_format == ResponseFormat.TEXT:
                 data = segment.text
             elif response_format == ResponseFormat.JSON:
-                data = TranscriptionJsonResponse.from_segments(
-                    [segment]
-                ).model_dump_json()
             elif response_format == ResponseFormat.VERBOSE_JSON:
-                data = TranscriptionVerboseJsonResponse.from_segment(
-                    segment, transcription_info
-                ).model_dump_json()
             yield format_as_sse(data)
     return StreamingResponse(segment_responses(), media_type="text/event-stream")
 def handle_default_openai_model(model_name: str) -> str:
-    """This exists because some callers may not be able override the default("whisper-1") model name.
     For example, https://github.com/open-webui/open-webui/issues/2248#issuecomment-2162997623.
     """
     if model_name == "whisper-1":
-        logger.info(
-            f"{model_name} is not a valid model name. Using {config.whisper.model} instead."
-        )
         return config.whisper.model
     return model_name
@@ -194,12 +188,7 @@ def translate_file(
     response_format: Annotated[ResponseFormat, Form()] = config.default_response_format,
     temperature: Annotated[float, Form()] = 0.0,
     stream: Annotated[bool, Form()] = False,
-) -> (
-    str
-    | TranscriptionJsonResponse
-    | TranscriptionVerboseJsonResponse
-    | StreamingResponse
-):
     whisper = load_model(model)
     segments, transcription_info = whisper.transcribe(
         file.file,
@@ -210,9 +199,7 @@ def translate_file(
     )
     if stream:
-        return segments_to_streaming_response(
-            segments, transcription_info, response_format
-        )
     else:
         return segments_to_response(segments, transcription_info, response_format)
@@ -231,16 +218,11 @@ def transcribe_file(
     response_format: Annotated[ResponseFormat, Form()] = config.default_response_format,
     temperature: Annotated[float, Form()] = 0.0,
     timestamp_granularities: Annotated[
-        list[Literal["segment"] | Literal["word"]],
         Form(alias="timestamp_granularities[]"),
     ] = ["segment"],
     stream: Annotated[bool, Form()] = False,
-) -> (
-    str
-    | TranscriptionJsonResponse
-    | TranscriptionVerboseJsonResponse
-    | StreamingResponse
-):
     whisper = load_model(model)
     segments, transcription_info = whisper.transcribe(
         file.file,
@@ -253,9 +235,7 @@ def transcribe_file(
     )
     if stream:
-        return segments_to_streaming_response(
-            segments, transcription_info, response_format
-        )
     else:
         return segments_to_response(segments, transcription_info, response_format)
@@ -263,39 +243,28 @@ def transcribe_file(
 async def audio_receiver(ws: WebSocket, audio_stream: AudioStream) -> None:
     try:
         while True:
-            bytes_ = await asyncio.wait_for(
-                ws.receive_bytes(), timeout=config.max_no_data_seconds
-            )
             logger.debug(f"Received {len(bytes_)} bytes of audio data")
             audio_samples = audio_samples_from_file(BytesIO(bytes_))
             audio_stream.extend(audio_samples)
             if audio_stream.duration - config.inactivity_window_seconds >= 0:
-                audio = audio_stream.after(
-                    audio_stream.duration - config.inactivity_window_seconds
-                )
                 vad_opts = VadOptions(min_silence_duration_ms=500, speech_pad_ms=0)
                 # NOTE: This is a synchronous operation that runs every time new data is received.
-                # This shouldn't be an issue unless data is being received in tiny chunks or the user's machine is a potato.
                 timestamps = get_speech_timestamps(audio.data, vad_opts)
                 if len(timestamps) == 0:
-                    logger.info(
-                        f"No speech detected in the last {config.inactivity_window_seconds} seconds."
-                    )
                     break
                 elif (
                     # last speech end time
-                    config.inactivity_window_seconds
-                    - timestamps[-1]["end"] / SAMPLES_PER_SECOND
                     >= config.max_inactivity_seconds
                 ):
-                    logger.info(
-                        f"Not enough speech in the last {config.inactivity_window_seconds} seconds."
-                    )
                     break
-    except asyncio.TimeoutError:
-        logger.info(
-            f"No data received in {config.max_no_data_seconds} seconds. Closing the connection."
-        )
     except WebSocketDisconnect as e:
         logger.info(f"Client disconnected: {e}")
     audio_stream.close()
@@ -306,9 +275,7 @@ async def transcribe_stream(
     ws: WebSocket,
     model: Annotated[ModelName, Query()] = config.whisper.model,
     language: Annotated[Language | None, Query()] = config.default_language,
-    response_format: Annotated[
-        ResponseFormat, Query()
-    ] = config.default_response_format,
     temperature: Annotated[float, Query()] = 0.0,
 ) -> None:
     await ws.accept()
@@ -331,19 +298,11 @@ async def transcribe_stream(
             if response_format == ResponseFormat.TEXT:
                 await ws.send_text(transcription.text)
             elif response_format == ResponseFormat.JSON:
-                await ws.send_json(
-                    TranscriptionJsonResponse.from_transcription(
-                        transcription
-                    ).model_dump()
-                )
             elif response_format == ResponseFormat.VERBOSE_JSON:
-                await ws.send_json(
-                    TranscriptionVerboseJsonResponse.from_transcription(
-                        transcription
-                    ).model_dump()
-                )
-    if not ws.client_state == WebSocketState.DISCONNECTED:
         logger.info("Closing the connection.")
         await ws.close()

 from __future__ import annotations
 import asyncio
+from collections import OrderedDict
 from io import BytesIO
+import time
+from typing import TYPE_CHECKING, Annotated, Literal
 from fastapi import (
     FastAPI,
     Form,
 from fastapi.responses import StreamingResponse
 from fastapi.websockets import WebSocketState
 from faster_whisper import WhisperModel
 from faster_whisper.vad import VadOptions, get_speech_timestamps
+import gradio as gr
+import huggingface_hub
 from pydantic import AfterValidator
 from faster_whisper_server import utils
 )
 from faster_whisper_server.transcriber import audio_transcriber
+if TYPE_CHECKING:
+    from collections.abc import Generator, Iterable
+    from faster_whisper.transcribe import Segment, TranscriptionInfo
+    from huggingface_hub.hf_api import ModelInfo
 loaded_models: OrderedDict[str, WhisperModel] = OrderedDict()
         return loaded_models[model_name]
     if len(loaded_models) >= config.max_models:
         oldest_model_name = next(iter(loaded_models))
+        logger.info(f"Max models ({config.max_models}) reached. Unloading the oldest model: {oldest_model_name}")
         del loaded_models[oldest_model_name]
     logger.debug(f"Loading {model_name}...")
     start = time.perf_counter()
         compute_type=config.whisper.compute_type,
     )
     logger.info(
+        f"Loaded {model_name} loaded in {time.perf_counter() - start:.2f} seconds. {config.whisper.inference_device}({config.whisper.compute_type}) will be used for inference."  # noqa: E501
     )
     loaded_models[model_name] = whisper
     return whisper
 def get_model(
     model_name: Annotated[str, Path(example="Systran/faster-distil-whisper-large-v3")],
 ) -> ModelObject:
+    models = list(huggingface_hub.list_models(model_name=model_name, library="ctranslate2"))
     if len(models) == 0:
         raise HTTPException(status_code=404, detail="Model doesn't exists")
     exact_match: ModelInfo | None = None
     response_format: ResponseFormat,
 ) -> str | TranscriptionJsonResponse | TranscriptionVerboseJsonResponse:
     segments = list(segments)
+    if response_format == ResponseFormat.TEXT:  # noqa: RET503
         return utils.segments_text(segments)
     elif response_format == ResponseFormat.JSON:
         return TranscriptionJsonResponse.from_segments(segments)
     elif response_format == ResponseFormat.VERBOSE_JSON:
+        return TranscriptionVerboseJsonResponse.from_segments(segments, transcription_info)
 def format_as_sse(data: str) -> str:
             if response_format == ResponseFormat.TEXT:
                 data = segment.text
             elif response_format == ResponseFormat.JSON:
+                data = TranscriptionJsonResponse.from_segments([segment]).model_dump_json()
             elif response_format == ResponseFormat.VERBOSE_JSON:
+                data = TranscriptionVerboseJsonResponse.from_segment(segment, transcription_info).model_dump_json()
             yield format_as_sse(data)
     return StreamingResponse(segment_responses(), media_type="text/event-stream")
 def handle_default_openai_model(model_name: str) -> str:
+    """Exists because some callers may not be able override the default("whisper-1") model name.
     For example, https://github.com/open-webui/open-webui/issues/2248#issuecomment-2162997623.
     """
     if model_name == "whisper-1":
+        logger.info(f"{model_name} is not a valid model name. Using {config.whisper.model} instead.")
         return config.whisper.model
     return model_name
     response_format: Annotated[ResponseFormat, Form()] = config.default_response_format,
     temperature: Annotated[float, Form()] = 0.0,
     stream: Annotated[bool, Form()] = False,
+) -> str | TranscriptionJsonResponse | TranscriptionVerboseJsonResponse | StreamingResponse:
     whisper = load_model(model)
     segments, transcription_info = whisper.transcribe(
         file.file,
     )
     if stream:
+        return segments_to_streaming_response(segments, transcription_info, response_format)
     else:
         return segments_to_response(segments, transcription_info, response_format)
     response_format: Annotated[ResponseFormat, Form()] = config.default_response_format,
     temperature: Annotated[float, Form()] = 0.0,
     timestamp_granularities: Annotated[
+        list[Literal["segment", "word"]],
         Form(alias="timestamp_granularities[]"),
     ] = ["segment"],
     stream: Annotated[bool, Form()] = False,
+) -> str | TranscriptionJsonResponse | TranscriptionVerboseJsonResponse | StreamingResponse:
     whisper = load_model(model)
     segments, transcription_info = whisper.transcribe(
         file.file,
     )
     if stream:
+        return segments_to_streaming_response(segments, transcription_info, response_format)
     else:
         return segments_to_response(segments, transcription_info, response_format)
 async def audio_receiver(ws: WebSocket, audio_stream: AudioStream) -> None:
     try:
         while True:
+            bytes_ = await asyncio.wait_for(ws.receive_bytes(), timeout=config.max_no_data_seconds)
             logger.debug(f"Received {len(bytes_)} bytes of audio data")
             audio_samples = audio_samples_from_file(BytesIO(bytes_))
             audio_stream.extend(audio_samples)
             if audio_stream.duration - config.inactivity_window_seconds >= 0:
+                audio = audio_stream.after(audio_stream.duration - config.inactivity_window_seconds)
                 vad_opts = VadOptions(min_silence_duration_ms=500, speech_pad_ms=0)
                 # NOTE: This is a synchronous operation that runs every time new data is received.
+                # This shouldn't be an issue unless data is being received in tiny chunks or the user's machine is a potato.  # noqa: E501
                 timestamps = get_speech_timestamps(audio.data, vad_opts)
                 if len(timestamps) == 0:
+                    logger.info(f"No speech detected in the last {config.inactivity_window_seconds} seconds.")
                     break
                 elif (
                     # last speech end time
+                    config.inactivity_window_seconds - timestamps[-1]["end"] / SAMPLES_PER_SECOND
                     >= config.max_inactivity_seconds
                 ):
+                    logger.info(f"Not enough speech in the last {config.inactivity_window_seconds} seconds.")
                     break
+    except TimeoutError:
+        logger.info(f"No data received in {config.max_no_data_seconds} seconds. Closing the connection.")
     except WebSocketDisconnect as e:
         logger.info(f"Client disconnected: {e}")
     audio_stream.close()
     ws: WebSocket,
     model: Annotated[ModelName, Query()] = config.whisper.model,
     language: Annotated[Language | None, Query()] = config.default_language,
+    response_format: Annotated[ResponseFormat, Query()] = config.default_response_format,
     temperature: Annotated[float, Query()] = 0.0,
 ) -> None:
     await ws.accept()
             if response_format == ResponseFormat.TEXT:
                 await ws.send_text(transcription.text)
             elif response_format == ResponseFormat.JSON:
+                await ws.send_json(TranscriptionJsonResponse.from_transcription(transcription).model_dump())
             elif response_format == ResponseFormat.VERBOSE_JSON:
+                await ws.send_json(TranscriptionVerboseJsonResponse.from_transcription(transcription).model_dump())
+    if ws.client_state != WebSocketState.DISCONNECTED:
         logger.info("Closing the connection.")
         await ws.close()

faster_whisper_server/server_models.py CHANGED Viewed

@@ -1,12 +1,15 @@
 from __future__ import annotations
-from typing import Literal
-from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
 from pydantic import BaseModel, ConfigDict, Field
 from faster_whisper_server import utils
-from faster_whisper_server.core import Transcription
 # https://platform.openai.com/docs/api-reference/audio/json-object
@@ -18,9 +21,7 @@ class TranscriptionJsonResponse(BaseModel):
         return cls(text=utils.segments_text(segments))
     @classmethod
-    def from_transcription(
-        cls, transcription: Transcription
-    ) -> TranscriptionJsonResponse:
         return cls(text=transcription.text)
@@ -78,18 +79,12 @@ class TranscriptionVerboseJsonResponse(BaseModel):
     segments: list[SegmentObject]
     @classmethod
-    def from_segment(
-        cls, segment: Segment, transcription_info: TranscriptionInfo
-    ) -> TranscriptionVerboseJsonResponse:
         return cls(
             language=transcription_info.language,
             duration=segment.end - segment.start,
             text=segment.text,
-            words=(
-                [WordObject.from_word(word) for word in segment.words]
-                if isinstance(segment.words, list)
-                else []
-            ),
             segments=[SegmentObject.from_segment(segment)],
         )
@@ -102,16 +97,11 @@ class TranscriptionVerboseJsonResponse(BaseModel):
             duration=transcription_info.duration,
             text=utils.segments_text(segments),
             segments=[SegmentObject.from_segment(segment) for segment in segments],
-            words=[
-                WordObject.from_word(word)
-                for word in utils.words_from_segments(segments)
-            ],
         )
     @classmethod
-    def from_transcription(
-        cls, transcription: Transcription
-    ) -> TranscriptionVerboseJsonResponse:
         return cls(
             language="english",  # FIX: hardcoded
             duration=transcription.duration,

 from __future__ import annotations
+from typing import TYPE_CHECKING, Literal
 from pydantic import BaseModel, ConfigDict, Field
 from faster_whisper_server import utils
+if TYPE_CHECKING:
+    from faster_whisper.transcribe import Segment, TranscriptionInfo, Word
+    from faster_whisper_server.core import Transcription
 # https://platform.openai.com/docs/api-reference/audio/json-object
         return cls(text=utils.segments_text(segments))
     @classmethod
+    def from_transcription(cls, transcription: Transcription) -> TranscriptionJsonResponse:
         return cls(text=transcription.text)
     segments: list[SegmentObject]
     @classmethod
+    def from_segment(cls, segment: Segment, transcription_info: TranscriptionInfo) -> TranscriptionVerboseJsonResponse:
         return cls(
             language=transcription_info.language,
             duration=segment.end - segment.start,
             text=segment.text,
+            words=([WordObject.from_word(word) for word in segment.words] if isinstance(segment.words, list) else []),
             segments=[SegmentObject.from_segment(segment)],
         )
             duration=transcription_info.duration,
             text=utils.segments_text(segments),
             segments=[SegmentObject.from_segment(segment) for segment in segments],
+            words=[WordObject.from_word(word) for word in utils.words_from_segments(segments)],
         )
     @classmethod
+    def from_transcription(cls, transcription: Transcription) -> TranscriptionVerboseJsonResponse:
         return cls(
             language="english",  # FIX: hardcoded
             duration=transcription.duration,

faster_whisper_server/transcriber.py CHANGED Viewed

@@ -1,8 +1,7 @@
 from __future__ import annotations
-from typing import AsyncGenerator
-from faster_whisper_server.asr import FasterWhisperASR
 from faster_whisper_server.audio import Audio, AudioStream
 from faster_whisper_server.config import config
 from faster_whisper_server.core import (
@@ -13,6 +12,11 @@ from faster_whisper_server.core import (
 )
 from faster_whisper_server.logger import logger
 class LocalAgreement:
     def __init__(self) -> None:

 from __future__ import annotations
+from typing import TYPE_CHECKING
 from faster_whisper_server.audio import Audio, AudioStream
 from faster_whisper_server.config import config
 from faster_whisper_server.core import (
 )
 from faster_whisper_server.logger import logger
+if TYPE_CHECKING:
+    from collections.abc import AsyncGenerator
+    from faster_whisper_server.asr import FasterWhisperASR
 class LocalAgreement:
     def __init__(self) -> None:

pyproject.toml CHANGED Viewed

@@ -28,18 +28,35 @@ target-version = "py312"
 [tool.ruff.lint]
 select = ["ALL"]
 ignore = [
-    "D10",  # disabled required docstrings
     "ERA",  # allow commented out code
-    "TD", # disable TODO warnings
-    "FIX002", # disable TODO warnings
     "COM812", # trailing comma
-    "T201", # print
     "S101", # allow assert
-    "PTH123", # Path.open
     "S603", # subprocess untrusted input
-    "ANN101", # missing self type
 ]
 [tool.ruff.lint.isort]

 [tool.ruff.lint]
 select = ["ALL"]
 ignore = [
+    "FIX",
+    "TD", # disable todo warnings
     "ERA",  # allow commented out code
+    "PTH",
+    "ANN003", # missing kwargs
+    "ANN101", # missing self type
+    "ANN102", # missing cls
+    "B006",
+    "B008",
     "COM812", # trailing comma
+    "D10",  # disabled required docstrings
+    "D401",
+    "EM102",
+    "FBT001",
+    "FBT002",
+    "PLR0913",
+    "PLR2004", # magic
+    "RET504",
+    "RET505",
+    "RET508",
     "S101", # allow assert
+    "S104",
     "S603", # subprocess untrusted input
+    "SIM102",
+    "T201", # print
+    "TRY003",
+    "W505",
+    "ISC001" # recommended to disable for formatting
 ]
 [tool.ruff.lint.isort]

tests/api_model_test.py CHANGED Viewed

@@ -4,9 +4,7 @@ from faster_whisper_server.server_models import ModelObject
 MODEL_THAT_EXISTS = "Systran/faster-whisper-tiny.en"
 MODEL_THAT_DOES_NOT_EXIST = "i-do-not-exist"
-MIN_EXPECTED_NUMBER_OF_MODELS = (
-    200  # At the time of the test creation there are 228 models
-)
 # HACK: because ModelObject(**data) doesn't work
@@ -19,20 +17,20 @@ def model_dict_to_object(model_dict: dict) -> ModelObject:
     )
-def test_list_models(client: TestClient):
     response = client.get("/v1/models")
     data = response.json()
     models = [model_dict_to_object(model_dict) for model_dict in data]
     assert len(models) > MIN_EXPECTED_NUMBER_OF_MODELS
-def test_model_exists(client: TestClient):
     response = client.get(f"/v1/models/{MODEL_THAT_EXISTS}")
     data = response.json()
     model = model_dict_to_object(data)
     assert model.id == MODEL_THAT_EXISTS
-def test_model_does_not_exist(client: TestClient):
     response = client.get(f"/v1/models/{MODEL_THAT_DOES_NOT_EXIST}")
     assert response.status_code == 404

 MODEL_THAT_EXISTS = "Systran/faster-whisper-tiny.en"
 MODEL_THAT_DOES_NOT_EXIST = "i-do-not-exist"
+MIN_EXPECTED_NUMBER_OF_MODELS = 200  # At the time of the test creation there are 228 models
 # HACK: because ModelObject(**data) doesn't work
     )
+def test_list_models(client: TestClient) -> None:
     response = client.get("/v1/models")
     data = response.json()
     models = [model_dict_to_object(model_dict) for model_dict in data]
     assert len(models) > MIN_EXPECTED_NUMBER_OF_MODELS
+def test_model_exists(client: TestClient) -> None:
     response = client.get(f"/v1/models/{MODEL_THAT_EXISTS}")
     data = response.json()
     model = model_dict_to_object(data)
     assert model.id == MODEL_THAT_EXISTS
+def test_model_does_not_exist(client: TestClient) -> None:
     response = client.get(f"/v1/models/{MODEL_THAT_DOES_NOT_EXIST}")
     assert response.status_code == 404

tests/app_test.py CHANGED Viewed

@@ -1,10 +1,10 @@
 import json
 import os
 import time
-from typing import Generator
-import pytest
 from fastapi.testclient import TestClient
 from starlette.testclient import WebSocketTestSession
 from faster_whisper_server.config import BYTES_PER_SECOND
@@ -22,35 +22,31 @@ def ws(client: TestClient) -> Generator[WebSocketTestSession, None, None]:
         yield ws
-def get_audio_file_paths():
-    file_paths = []
     directory = "tests/data"
     for filename in sorted(os.listdir(directory)[:AUDIO_FILES_LIMIT]):
-        file_paths.append(os.path.join(directory, filename))
     return file_paths
 file_paths = get_audio_file_paths()
-def stream_audio_data(
-    ws: WebSocketTestSession, data: bytes, *, chunk_size: int = 4000, speed: float = 1.0
-):
     for i in range(0, len(data), chunk_size):
         ws.send_bytes(data[i : i + chunk_size])
         delay = len(data[i : i + chunk_size]) / BYTES_PER_SECOND / speed
         time.sleep(delay)
-def transcribe_audio_data(
-    client: TestClient, data: bytes
-) -> TranscriptionVerboseJsonResponse:
     response = client.post(
         TRANSCRIBE_ENDPOINT,
         files={"file": ("audio.raw", data, "audio/raw")},
     )
     data = json.loads(response.json())  # TODO: figure this out
-    return TranscriptionVerboseJsonResponse(**data)  # type: ignore
 # @pytest.mark.parametrize("file_path", file_paths)
@@ -60,7 +56,7 @@ def transcribe_audio_data(
 #     with open(file_path, "rb") as file:
 #         data = file.read()
 #
-#     streaming_transcription: TranscriptionVerboseJsonResponse = None  # type: ignore
 #     thread = threading.Thread(
 #         target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
 #     )

+from collections.abc import Generator
 import json
 import os
 import time
 from fastapi.testclient import TestClient
+import pytest
 from starlette.testclient import WebSocketTestSession
 from faster_whisper_server.config import BYTES_PER_SECOND
         yield ws
+def get_audio_file_paths() -> list[str]:
+    file_paths: list[str] = []
     directory = "tests/data"
     for filename in sorted(os.listdir(directory)[:AUDIO_FILES_LIMIT]):
+        file_paths.append(os.path.join(directory, filename))  # noqa: PERF401
     return file_paths
 file_paths = get_audio_file_paths()
+def stream_audio_data(ws: WebSocketTestSession, data: bytes, *, chunk_size: int = 4000, speed: float = 1.0) -> None:
     for i in range(0, len(data), chunk_size):
         ws.send_bytes(data[i : i + chunk_size])
         delay = len(data[i : i + chunk_size]) / BYTES_PER_SECOND / speed
         time.sleep(delay)
+def transcribe_audio_data(client: TestClient, data: bytes) -> TranscriptionVerboseJsonResponse:
     response = client.post(
         TRANSCRIBE_ENDPOINT,
         files={"file": ("audio.raw", data, "audio/raw")},
     )
     data = json.loads(response.json())  # TODO: figure this out
+    return TranscriptionVerboseJsonResponse(**data)  # pyright: ignore[reportCallIssue]
 # @pytest.mark.parametrize("file_path", file_paths)
 #     with open(file_path, "rb") as file:
 #         data = file.read()
 #
+#     streaming_transcription: TranscriptionVerboseJsonResponse = None  # type: ignore  # noqa: PGH003
 #     thread = threading.Thread(
 #         target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
 #     )

tests/conftest.py CHANGED Viewed

@@ -1,18 +1,15 @@
 import logging
-import os
-from typing import Generator
-import pytest
 from fastapi.testclient import TestClient
-# HACK
-os.environ["WHISPER_MODEL"] = "Systran/faster-whisper-tiny.en"
-from faster_whisper_server.main import app  # noqa: E402
 disable_loggers = ["multipart.multipart", "faster_whisper"]
-def pytest_configure():
     for logger_name in disable_loggers:
         logger = logging.getLogger(logger_name)
         logger.disabled = True

+from collections.abc import Generator
 import logging
 from fastapi.testclient import TestClient
+import pytest
+from faster_whisper_server.main import app
 disable_loggers = ["multipart.multipart", "faster_whisper"]
+def pytest_configure() -> None:
     for logger_name in disable_loggers:
         logger = logging.getLogger(logger_name)
         logger.disabled = True

tests/sse_test.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import json
 import os
-import pytest
 from fastapi.testclient import TestClient
 from httpx_sse import connect_sse
 from faster_whisper_server.server_models import (
     TranscriptionJsonResponse,
@@ -17,15 +17,11 @@ ENDPOINTS = [
 ]
-parameters = [
-    (file_path, endpoint) for endpoint in ENDPOINTS for file_path in FILE_PATHS
-]
-@pytest.mark.parametrize("file_path,endpoint", parameters)
-def test_streaming_transcription_text(
-    client: TestClient, file_path: str, endpoint: str
-):
     extension = os.path.splitext(file_path)[1]
     with open(file_path, "rb") as f:
         data = f.read()
@@ -36,15 +32,11 @@ def test_streaming_transcription_text(
     with connect_sse(client, "POST", endpoint, **kwargs) as event_source:
         for event in event_source.iter_sse():
             print(event)
-            assert (
-                len(event.data) > 1
-            )  # HACK: 1 because of the space character that's always prepended
-@pytest.mark.parametrize("file_path,endpoint", parameters)
-def test_streaming_transcription_json(
-    client: TestClient, file_path: str, endpoint: str
-):
     extension = os.path.splitext(file_path)[1]
     with open(file_path, "rb") as f:
         data = f.read()
@@ -57,10 +49,8 @@ def test_streaming_transcription_json(
             TranscriptionJsonResponse(**json.loads(event.data))
-@pytest.mark.parametrize("file_path,endpoint", parameters)
-def test_streaming_transcription_verbose_json(
-    client: TestClient, file_path: str, endpoint: str
-):
     extension = os.path.splitext(file_path)[1]
     with open(file_path, "rb") as f:
         data = f.read()

 import json
 import os
 from fastapi.testclient import TestClient
 from httpx_sse import connect_sse
+import pytest
 from faster_whisper_server.server_models import (
     TranscriptionJsonResponse,
 ]
+parameters = [(file_path, endpoint) for endpoint in ENDPOINTS for file_path in FILE_PATHS]
+@pytest.mark.parametrize(("file_path", "endpoint"), parameters)
+def test_streaming_transcription_text(client: TestClient, file_path: str, endpoint: str) -> None:
     extension = os.path.splitext(file_path)[1]
     with open(file_path, "rb") as f:
         data = f.read()
     with connect_sse(client, "POST", endpoint, **kwargs) as event_source:
         for event in event_source.iter_sse():
             print(event)
+            assert len(event.data) > 1  # HACK: 1 because of the space character that's always prepended
+@pytest.mark.parametrize(("file_path", "endpoint"), parameters)
+def test_streaming_transcription_json(client: TestClient, file_path: str, endpoint: str) -> None:
     extension = os.path.splitext(file_path)[1]
     with open(file_path, "rb") as f:
         data = f.read()
             TranscriptionJsonResponse(**json.loads(event.data))
+@pytest.mark.parametrize(("file_path", "endpoint"), parameters)
+def test_streaming_transcription_verbose_json(client: TestClient, file_path: str, endpoint: str) -> None:
     extension = os.path.splitext(file_path)[1]
     with open(file_path, "rb") as f:
         data = f.read()