# Multi-stage Dockerfile for tau-rag
# ---------------------------------------------------------------------------
# STAGE 1: base — slim Python + minimal deps (runs mock + extractive).
# STAGE 2: full — adds torch, faiss, sentence-transformers (AlephBERT).
#
# Usage:
#   docker build -t tau-rag:slim --target slim .
#   docker build -t tau-rag:full --target full .
#   docker run -p 8000:8000 tau-rag:slim
# ---------------------------------------------------------------------------

FROM python:3.11-slim AS slim

WORKDIR /app

# Only what we need for the zero-dep pipeline + FastAPI
COPY pyproject.toml /app/
COPY . /app/tau_rag

RUN pip install --no-cache-dir \
    "pydantic>=2.6" \
    "rank-bm25>=0.2.2" \
    "numpy>=1.26" \
    "fastapi>=0.111" \
    "uvicorn[standard]>=0.29" \
    "pypdf>=3.0"

ENV PYTHONPATH=/app
EXPOSE 8000
CMD ["uvicorn", "tau_rag.api.fastapi_app:app", \
     "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]

# ---------------------------------------------------------------------------
FROM slim AS full
# Adds AlephBERT + FAISS — ~3GB image but real semantic retrieval.
RUN pip install --no-cache-dir \
    "torch>=2.0" \
    "transformers>=4.40" \
    "sentence-transformers>=2.6" \
    "faiss-cpu>=1.8"

# Pre-warm AlephBERT so cold-start at runtime is just a memmap open.
RUN python -c "from tau_rag.core.embedders import make_transformers_embedder; \
    make_transformers_embedder('onlplab/alephbert-base')" || true

CMD ["uvicorn", "tau_rag.api.fastapi_app:app", \
     "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]