# include:
#   - compose.observability.yaml
services:
  speaches:
    extends:
      file: compose.yaml
      service: speaches
    image: ghcr.io/speaches-ai/speaches:latest-cuda
    build:
      args:
        BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04
    environment:
      - WHISPER__MODEL=Systran/faster-whisper-large-v3
    volumes:
      - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub
    deploy:
      resources:
        reservations:
          devices:
            - capabilities: ["gpu"]
volumes:
  hf-hub-cache: