# syntax=docker/dockerfile:1.6 # Unified DriftCall Space — single image serving: # / static React site # /reset … /healthz OpenEnv API # /demo Gradio voice demo (mounted, not iframed) # /env /lora /source server-rendered HTML pages # # Heavier than the env-only Space because we bake the demo's model deps # (transformers + peft + unsloth + torch + audio) so /demo runs locally. # HF_HUB_OFFLINE is OFF so /demo can pull Gemma-3n + the LoRA on demand. FROM python:3.11-slim AS builder ENV PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PYTHONDONTWRITEBYTECODE=1 WORKDIR /build RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential git libsndfile1 ffmpeg \ && rm -rf /var/lib/apt/lists/* COPY requirements.txt ./ RUN pip install --prefix=/install -r requirements.txt # Pre-pull TTS / ASR weights so the audio path is offline-fast. # The Gemma base + LoRA stay online — too big to bake; they'll cache on # first /demo session. RUN PYTHONPATH=/install/lib/python3.11/site-packages \ python -c "from huggingface_hub import snapshot_download; \ snapshot_download('hexgrad/Kokoro-82M', cache_dir='/weights'); \ snapshot_download('Systran/faster-whisper-small', cache_dir='/weights')" # -------- runtime -------- FROM python:3.11-slim ENV PYTHONUNBUFFERED=1 \ PYTHONDONTWRITEBYTECODE=1 \ HF_HOME=/root/.cache/huggingface \ WANDB_PROJECT=driftcall \ WANDB_MODE=disabled \ GRADIO_ANALYTICS_ENABLED=False \ GRADIO_SERVER_NAME=0.0.0.0 # gcc + python3-dev are needed at runtime (not just build) because Triton # JIT-compiles a CUDA helper on first GPU call (Inductor backend). Without a # C compiler the GRPO subprocess crashes during model load with: # torch._inductor.exc.InductorError: Failed to find C compiler RUN apt-get update && apt-get install -y --no-install-recommends \ libsndfile1 ffmpeg ca-certificates \ gcc g++ python3-dev \ && rm -rf /var/lib/apt/lists/* ENV CC=gcc CXX=g++ COPY --from=builder /install /usr/local COPY --from=builder /weights /root/.cache/huggingface WORKDIR /app COPY cells/ ./cells/ COPY data/ ./data/ COPY scripts/ ./scripts/ COPY app.py demo_app.py openenv.yaml unified_app.py online_trainer.py ./ COPY BLOG.md ./BLOG.md COPY site/ ./site/ EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=5s --start-period=60s \ CMD python -c "import urllib.request; \ urllib.request.urlopen('http://127.0.0.1:7860/healthz', timeout=4).read()" \ || exit 1 # Single worker — both Gradio (stateful UI) and our SessionPool prefer # single-process; horizontal scaling is for multi-instance deployments. CMD ["uvicorn", "unified_app:app", \ "--host", "0.0.0.0", \ "--port", "7860", \ "--workers", "1", \ "--timeout-keep-alive", "30", \ "--log-level", "info"]