|
# |
|
|
|
# ARG TARGETPLATFORM |
|
# ARG BUILDPLATFORM |
|
|
|
# |
|
# ARG PYTHON_VERSION=3.10 |
|
|
|
# |
|
# FROM python:${PYTHON_VERSION}-slim as base |
|
|
|
# |
|
# ARG INSTALL_TYPE=basic |
|
# ARG ENABLE_GPU=false |
|
|
|
# |
|
# LABEL maintainer="unclecode" |
|
# LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" |
|
# LABEL version="1.0" |
|
|
|
# |
|
# ENV PYTHONUNBUFFERED=1 \ |
|
|
|
|
|
|
|
|
|
|
|
|
|
# |
|
# RUN apt-get update && apt-get install -y --no-install-recommends \ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# |
|
# RUN apt-get update && apt-get install -y --no-install-recommends \ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# |
|
# RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# |
|
# WORKDIR /app |
|
|
|
# |
|
# COPY . . |
|
|
|
# |
|
# RUN pip install --no-cache-dir -r requirements.txt |
|
|
|
# |
|
# RUN pip install fastapi uvicorn psutil |
|
|
|
# |
|
# RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# |
|
# RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# |
|
# RUN pip install --no-cache-dir \ |
|
|
|
|
|
|
|
|
|
|
|
# |
|
# RUN mkdocs build |
|
|
|
# |
|
# RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ |
|
|
|
|
|
|
|
|
|
|
|
# |
|
# EXPOSE 8000 11235 9222 8080 |
|
|
|
# |
|
# CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] |
|
|
|
# syntax=docker/dockerfile:1.4 |
|
|
|
# syntax=docker/dockerfile:1.4 |
|
|
|
ARG TARGETPLATFORM |
|
ARG BUILDPLATFORM |
|
|
|
# Other build arguments |
|
ARG PYTHON_VERSION=3.10 |
|
|
|
# Base stage with system dependencies |
|
FROM python:${PYTHON_VERSION}-slim as base |
|
|
|
# Declare ARG variables again within the build stage |
|
ARG INSTALL_TYPE=basic |
|
ARG ENABLE_GPU=false |
|
|
|
# Platform-specific labels |
|
LABEL maintainer="unclecode" |
|
LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" |
|
LABEL version="1.0" |
|
|
|
# Environment setup |
|
ENV PYTHONUNBUFFERED=1 \ |
|
PYTHONDONTWRITEBYTECODE=1 \ |
|
PIP_NO_CACHE_DIR=1 \ |
|
PIP_DISABLE_PIP_VERSION_CHECK=1 \ |
|
PIP_DEFAULT_TIMEOUT=100 \ |
|
DEBIAN_FRONTEND=noninteractive |
|
|
|
# Install system dependencies as root |
|
RUN apt-get update && apt-get install -y --no-install-recommends \ |
|
build-essential \ |
|
curl \ |
|
wget \ |
|
gnupg \ |
|
git \ |
|
cmake \ |
|
pkg-config \ |
|
python3-dev \ |
|
libjpeg-dev \ |
|
libpng-dev \ |
|
&& rm -rf /var/lib/apt/lists/* |
|
|
|
# Playwright system dependencies for Linux |
|
RUN apt-get update && apt-get install -y --no-install-recommends \ |
|
libglib2.0-0 \ |
|
libnss3 \ |
|
libnspr4 \ |
|
libatk1.0-0 \ |
|
libatk-bridge2.0-0 \ |
|
libcups2 \ |
|
libdrm2 \ |
|
libdbus-1-3 \ |
|
libxcb1 \ |
|
libxkbcommon0 \ |
|
libx11-6 \ |
|
libxcomposite1 \ |
|
libxdamage1 \ |
|
libxext6 \ |
|
libxfixes3 \ |
|
libxrandr2 \ |
|
libgbm1 \ |
|
libpango-1.0-0 \ |
|
libcairo2 \ |
|
libasound2 \ |
|
libatspi2.0-0 \ |
|
&& rm -rf /var/lib/apt/lists/* |
|
|
|
# GPU support if enabled and architecture is supported |
|
RUN if [ "$ENABLE_GPU" = "true" ] && [ "$TARGETPLATFORM" = "linux/amd64" ] ; then \ |
|
apt-get update && apt-get install -y --no-install-recommends \ |
|
nvidia-cuda-toolkit \ |
|
&& rm -rf /var/lib/apt/lists/* ; \ |
|
else \ |
|
echo "Skipping NVIDIA CUDA Toolkit installation (unsupported platform or GPU disabled)"; \ |
|
fi |
|
|
|
# Add a non-root user |
|
RUN useradd -m -u 1000 user |
|
USER user |
|
ENV PATH="/home/user/.local/bin:$PATH" |
|
|
|
# Create and set working directory |
|
WORKDIR /app |
|
|
|
# Copy the entire project with correct ownership |
|
COPY --chown=user . . |
|
|
|
# Install base requirements |
|
RUN pip install --no-cache-dir -r requirements.txt |
|
|
|
# Install required library for FastAPI |
|
RUN pip install fastapi uvicorn psutil |
|
|
|
# Install ML dependencies first for better layer caching |
|
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
|
pip install --no-cache-dir \ |
|
torch \ |
|
torchvision \ |
|
torchaudio \ |
|
scikit-learn \ |
|
nltk \ |
|
transformers \ |
|
tokenizers && \ |
|
python -m nltk.downloader punkt stopwords ; \ |
|
fi |
|
|
|
# Install the package |
|
RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ |
|
pip install ".[all]" && \ |
|
python -m crawl4ai.model_loader ; \ |
|
elif [ "$INSTALL_TYPE" = "torch" ] ; then \ |
|
pip install ".[torch]" ; \ |
|
elif [ "$INSTALL_TYPE" = "transformer" ] ; then \ |
|
pip install ".[transformer]" && \ |
|
python -m crawl4ai.model_loader ; \ |
|
else \ |
|
pip install "." ; \ |
|
fi |
|
|
|
# Install MkDocs and required plugins |
|
RUN pip install --no-cache-dir \ |
|
mkdocs \ |
|
mkdocs-material \ |
|
mkdocs-terminal \ |
|
pymdown-extensions |
|
|
|
# Build MkDocs documentation |
|
RUN mkdocs build |
|
|
|
# Install Playwright and browsers |
|
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ |
|
playwright install chromium; \ |
|
elif [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ |
|
playwright install chromium; \ |
|
fi |
|
|
|
# Expose port |
|
EXPOSE 8000 11235 9222 8080 |
|
|
|
RUN python -m playwright install chromium |
|
|
|
# Start the FastAPI server |
|
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] |