# include: # - compose.observability.yaml services: speaches: extends: file: compose.yaml service: speaches image: ghcr.io/speaches-ai/speaches:latest-cuda build: args: BASE_IMAGE: nvidia/cuda:12.6.2-cudnn-runtime-ubuntu24.04 environment: - WHISPER__MODEL=Systran/faster-whisper-large-v3 volumes: - hf-hub-cache:/home/ubuntu/.cache/huggingface/hub deploy: resources: reservations: devices: - capabilities: ["gpu"] volumes: hf-hub-cache: