Crawl4AI / Dockerfile
amaye15
test
77e5676
# # syntax=docker/dockerfile:1.4
# ARG TARGETPLATFORM
# ARG BUILDPLATFORM
# # Other build arguments
# ARG PYTHON_VERSION=3.10
# # Base stage with system dependencies
# FROM python:${PYTHON_VERSION}-slim as base
# # Declare ARG variables again within the build stage
# ARG INSTALL_TYPE=basic
# ARG ENABLE_GPU=false
# # Platform-specific labels
# LABEL maintainer="unclecode"
# LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
# LABEL version="1.0"
# # Environment setup
# ENV PYTHONUNBUFFERED=1 \
# PYTHONDONTWRITEBYTECODE=1 \
# PIP_NO_CACHE_DIR=1 \
# PIP_DISABLE_PIP_VERSION_CHECK=1 \
# PIP_DEFAULT_TIMEOUT=100 \
# DEBIAN_FRONTEND=noninteractive
# # Install system dependencies
# RUN apt-get update && apt-get install -y --no-install-recommends \
# build-essential \
# curl \
# wget \
# gnupg \
# git \
# cmake \
# pkg-config \
# python3-dev \
# libjpeg-dev \
# libpng-dev \
# && rm -rf /var/lib/apt/lists/*
# # Playwright system dependencies for Linux
# RUN apt-get update && apt-get install -y --no-install-recommends \
# libglib2.0-0 \
# libnss3 \
# libnspr4 \
# libatk1.0-0 \
# libatk-bridge2.0-0 \
# libcups2 \
# libdrm2 \
# libdbus-1-3 \
# libxcb1 \
# libxkbcommon0 \
# libx11-6 \
# libxcomposite1 \
# libxdamage1 \
# libxext6 \
# libxfixes3 \
# libxrandr2 \
# libgbm1 \
# libpango-1.0-0 \
# libcairo2 \
# libasound2 \
# libatspi2.0-0 \
# && rm -rf /var/lib/apt/lists/*
# # GPU support if enabled and architecture is supported
# RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
# apt-get update && apt-get install -y --no-install-recommends \
# nvidia-cuda-toolkit \
# && rm -rf /var/lib/apt/lists/* ; \
# else \
# echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
# fi
# # Create and set working directory
# WORKDIR /app
# # Copy the entire project
# COPY . .
# # Install base requirements
# RUN pip install --no-cache-dir -r requirements.txt
# # Install required library for FastAPI
# RUN pip install fastapi uvicorn psutil
# # Install ML dependencies first for better layer caching
# RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
# pip install --no-cache-dir \
# torch \
# torchvision \
# torchaudio \
# scikit-learn \
# nltk \
# transformers \
# tokenizers && \
# python -m nltk.downloader punkt stopwords ; \
# fi
# # Install the package
# RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
# pip install ".[all]" && \
# python -m crawl4ai.model_loader ; \
# elif [ "$INSTALL_TYPE" = "torch" ] ; then \
# pip install ".[torch]" ; \
# elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
# pip install ".[transformer]" && \
# python -m crawl4ai.model_loader ; \
# else \
# pip install "." ; \
# fi
# # Install MkDocs and required plugins
# RUN pip install --no-cache-dir \
# mkdocs \
# mkdocs-material \
# mkdocs-terminal \
# pymdown-extensions
# # Build MkDocs documentation
# RUN mkdocs build
# # Install Playwright and browsers
# RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
# playwright install chromium; \
# elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
# playwright install chromium; \
# fi
# # Expose port
# EXPOSE 8000 11235 9222 8080
# # Start the FastAPI server
# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"]
# syntax=docker/dockerfile:1.4
# syntax=docker/dockerfile:1.4
ARG TARGETPLATFORM
ARG BUILDPLATFORM
# Other build arguments
ARG PYTHON_VERSION=3.10
# Base stage with system dependencies
FROM python:${PYTHON_VERSION}-slim as base
# Declare ARG variables again within the build stage
ARG INSTALL_TYPE=basic
ARG ENABLE_GPU=false
# Platform-specific labels
LABEL maintainer="unclecode"
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper"
LABEL version="1.0"
# Environment setup
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_NO_CACHE_DIR=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_DEFAULT_TIMEOUT=100 \
DEBIAN_FRONTEND=noninteractive
# Install system dependencies as root
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
curl \
wget \
gnupg \
git \
cmake \
pkg-config \
python3-dev \
libjpeg-dev \
libpng-dev \
&& rm -rf /var/lib/apt/lists/*
# Playwright system dependencies for Linux
RUN apt-get update && apt-get install -y --no-install-recommends \
libglib2.0-0 \
libnss3 \
libnspr4 \
libatk1.0-0 \
libatk-bridge2.0-0 \
libcups2 \
libdrm2 \
libdbus-1-3 \
libxcb1 \
libxkbcommon0 \
libx11-6 \
libxcomposite1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxrandr2 \
libgbm1 \
libpango-1.0-0 \
libcairo2 \
libasound2 \
libatspi2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# GPU support if enabled and architecture is supported
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \
apt-get update && apt-get install -y --no-install-recommends \
nvidia-cuda-toolkit \
&& rm -rf /var/lib/apt/lists/* ; \
else \
echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \
fi
# Add a non-root user
RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:$PATH"
# Create and set working directory
WORKDIR /app
# Copy the entire project with correct ownership
COPY --chown=user . .
# Install base requirements
RUN pip install --no-cache-dir -r requirements.txt
# Install required library for FastAPI
RUN pip install fastapi uvicorn psutil
# Install ML dependencies first for better layer caching
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
pip install --no-cache-dir \
torch \
torchvision \
torchaudio \
scikit-learn \
nltk \
transformers \
tokenizers && \
python -m nltk.downloader punkt stopwords ; \
fi
# Install the package
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \
pip install ".[all]" && \
python -m crawl4ai.model_loader ; \
elif [ "$INSTALL_TYPE" = "torch" ] ; then \
pip install ".[torch]" ; \
elif [ "$INSTALL_TYPE" = "transformer" ] ; then \
pip install ".[transformer]" && \
python -m crawl4ai.model_loader ; \
else \
pip install "." ; \
fi
# Install MkDocs and required plugins
RUN pip install --no-cache-dir \
mkdocs \
mkdocs-material \
mkdocs-terminal \
pymdown-extensions
# Build MkDocs documentation
RUN mkdocs build
# Install Playwright and browsers
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
playwright install chromium; \
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
playwright install chromium; \
fi
# Expose port
EXPOSE 8000 11235 9222 8080
RUN python -m playwright install chromium
# Start the FastAPI server
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"]