# Multi-stage Dockerfile for tau-rag # --------------------------------------------------------------------------- # STAGE 1: base — slim Python + minimal deps (runs mock + extractive). # STAGE 2: full — adds torch, faiss, sentence-transformers (AlephBERT). # # Usage: # docker build -t tau-rag:slim --target slim . # docker build -t tau-rag:full --target full . # docker run -p 8000:8000 tau-rag:slim # --------------------------------------------------------------------------- FROM python:3.11-slim AS slim WORKDIR /app # Only what we need for the zero-dep pipeline + FastAPI COPY pyproject.toml /app/ COPY . /app/tau_rag RUN pip install --no-cache-dir \ "pydantic>=2.6" \ "rank-bm25>=0.2.2" \ "numpy>=1.26" \ "fastapi>=0.111" \ "uvicorn[standard]>=0.29" \ "pypdf>=3.0" ENV PYTHONPATH=/app EXPOSE 8000 CMD ["uvicorn", "tau_rag.api.fastapi_app:app", \ "--host", "0.0.0.0", "--port", "8000", "--workers", "2"] # --------------------------------------------------------------------------- FROM slim AS full # Adds AlephBERT + FAISS — ~3GB image but real semantic retrieval. RUN pip install --no-cache-dir \ "torch>=2.0" \ "transformers>=4.40" \ "sentence-transformers>=2.6" \ "faiss-cpu>=1.8" # Pre-warm AlephBERT so cold-start at runtime is just a memmap open. RUN python -c "from tau_rag.core.embedders import make_transformers_embedder; \ make_transformers_embedder('onlplab/alephbert-base')" || true CMD ["uvicorn", "tau_rag.api.fastapi_app:app", \ "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]