# syntax=docker/dockerfile:1.6
#
# Two-stage build:
#   1. `builder` installs pip deps and pre-downloads the three large model
#      assets into HF / spacy cache dirs.
#   2. final image copies only the populated caches + site-packages + repo,
#      avoiding the apt build deps that pulled torch wheels.
#
# Models pre-baked:
#   - sentence-transformers/all-MiniLM-L6-v2  (~90 MB)
#   - cross-encoder/nli-deberta-v3-xsmall     (~80 MB)
#   - spacy en_core_web_sm                    (~12 MB)
#
# Artifacts (chroma index, intent model, drift timeline) are COPYed in so
# the first request is instant — no cold-start build of the index.

ARG PY=3.11

# =========================================================================
FROM python:${PY}-slim AS builder
# =========================================================================

ENV PIP_NO_CACHE_DIR=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PYTHONDONTWRITEBYTECODE=1 \
    HF_HOME=/opt/hf-cache \
    SENTENCE_TRANSFORMERS_HOME=/opt/hf-cache/sentence-transformers \
    HF_HUB_DISABLE_TELEMETRY=1

# build deps for compiled wheels (chromadb -> hnswlib, sentence-transformers
# pulls torch which has its own wheel so usually no gcc needed, but leave
# build-essential available for safety on platforms without prebuilt wheels)
RUN apt-get update && apt-get install -y --no-install-recommends \
        build-essential \
        git \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /app

# Install python deps. CPU-only torch wheel to avoid the multi-GB CUDA pull.
COPY requirements.txt .
RUN pip install --upgrade pip && \
    pip install --extra-index-url https://download.pytorch.org/whl/cpu \
        "torch==2.5.1+cpu" && \
    pip install -r requirements.txt && \
    pip install gunicorn==23.0.0

# Pre-download models so the runtime image has no network dependency.
RUN python - <<'PY'
from sentence_transformers import SentenceTransformer, CrossEncoder
print("downloading all-MiniLM-L6-v2 ...")
SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
print("downloading nli-deberta-v3-xsmall ...")
CrossEncoder("cross-encoder/nli-deberta-v3-xsmall")
PY

RUN python -m spacy download en_core_web_sm

# =========================================================================
FROM python:${PY}-slim AS final
# =========================================================================

ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
    HF_HOME=/opt/hf-cache \
    SENTENCE_TRANSFORMERS_HOME=/opt/hf-cache/sentence-transformers \
    HF_HUB_DISABLE_TELEMETRY=1 \
    TRANSFORMERS_OFFLINE=1 \
    HF_HUB_OFFLINE=1 \
    PORT=7860

# Runtime-only system libs (no compilers needed at runtime).
RUN apt-get update && apt-get install -y --no-install-recommends \
        libgomp1 \
    && rm -rf /var/lib/apt/lists/*

# Pull installed site-packages, console scripts, and model caches from
# the builder layer. This keeps the final image free of apt build deps.
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin/gunicorn /usr/local/bin/gunicorn
COPY --from=builder /opt/hf-cache /opt/hf-cache

WORKDIR /app

# Code + artifacts. Copying artifacts/ means the chroma index, intent model,
# and drift timeline are already present — first hit is instant.
COPY src/ ./src/
COPY templates/ ./templates/
COPY app.py run_all.py ./
COPY artifacts/ ./artifacts/

# Non-root user for hosts that enforce it (HF Spaces, Cloud Run, etc.)
RUN useradd --create-home --uid 1000 appuser && chown -R appuser:appuser /app /opt/hf-cache
USER appuser

EXPOSE 7860

# HF Spaces routes traffic to $app_port (7860 from the README front matter).
# Other hosts override via $PORT at runtime; we default to 7860 so the Spaces
# health check finds a listener even if PORT is not injected.
CMD ["sh", "-c", "gunicorn app:app --workers=2 --timeout=120 --bind 0.0.0.0:${PORT:-7860}"]