FROM python:3.11-slim

ENV PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1

WORKDIR /app

# System dependencies:
# - postgresql / redis-server: internal services for the Space
# - build-essential / libpq-dev: Python packages with native extensions
# - curl / ca-certificates / zstd: Ollama manual install
# - libgomp1: common runtime dependency for ML libraries such as FAISS / torch stack
RUN apt-get update \
    && apt-get install -y --no-install-recommends \
        build-essential \
        ca-certificates \
        curl \
        libgomp1 \
        postgresql \
        postgresql-contrib \
        redis-server \
        zstd \
    && rm -rf /var/lib/apt/lists/*

# Install Ollama manually.
# Do not use systemd inside the Space container; start Ollama from start-space.sh.
RUN curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \
    | zstd -dc \
    | tar -x -C /usr

# Ollama runtime/model configuration.
ENV OLLAMA_HOST=127.0.0.1:11434 \
    OLLAMA_MODELS=/opt/ollama/models \
    OLLAMA_CONTEXT_LENGTH=8192 \
    OLLAMA_KEEP_ALIVE=5m \
    OLLAMA_NUM_PARALLEL=1 \
    LOCAL_LLM_BASE_URL=http://127.0.0.1:11434/v1 \
    LOCAL_LLM_API_KEY=ollama \
    LOCAL_LLM_MODEL=qwen3:0.6b

RUN mkdir -p /opt/ollama/models \
    && chmod -R 777 /opt/ollama

# Pre-pull the small local LLM into the Docker image.
# This avoids downloading qwen3:0.6b every time the Space starts.
RUN set -eux; \
    ollama serve > /tmp/ollama-build.log 2>&1 & \
    OLLAMA_PID="$!"; \
    for i in $(seq 1 120); do \
      if curl -fsS http://127.0.0.1:11434/api/tags >/dev/null 2>&1; then \
        echo "Ollama build server is ready."; \
        break; \
      fi; \
      sleep 1; \
    done; \
    curl -fsS http://127.0.0.1:11434/api/tags >/dev/null; \
    ollama pull qwen3:0.6b; \
    ollama list; \
    kill "${OLLAMA_PID}" || true; \
    sleep 2

# Install Python dependencies once before copying the application.
COPY requirements.txt ./requirements.txt
RUN python -m pip install --upgrade pip \
    && python -m pip install -r requirements.txt

# Copy application and deployment files.
# build_hf_space_bundle.py places Dockerfile, README.md, and start-space.sh
# at the root of build/hf-space, so these COPY paths are relative to that bundle.
COPY alembic ./alembic
COPY alembic.ini ./alembic.ini
COPY app ./app
COPY sample_docs ./sample_docs
COPY scripts ./scripts
COPY start-space.sh ./start-space.sh
COPY README.md ./README.md

RUN chmod +x ./start-space.sh \
    && chmod +x ./scripts/start-api.sh \
    && chmod +x ./scripts/start-worker.sh

# Hugging Face Space runtime configuration.
# FastAPI must listen on 7860. Your start-api.sh reads PORT first,
# then API_PORT, then falls back to 8000, so both PORT and API_PORT are set.
ENV APP_NAME="Smart Document QA" \
    APP_ENV=production \
    DEMO_MODE=true \
    SERVICE_ROLE=api \
    RUN_MIGRATIONS_ON_START=false \
    PORT=7860 \
    API_PORT=7860 \
    API_HOST=0.0.0.0 \
    DATABASE_URL=postgresql+psycopg://docqa:docqa@127.0.0.1:5432/docqa \
    REDIS_URL=redis://127.0.0.1:6379/0 \
    CELERY_CONCURRENCY=1 \
    OPENAI_BASE_URL= \
    OPENAI_API_KEY= \
    OPENAI_MODEL= \
    DEFAULT_OPENAI_MODEL= \
    EMBEDDING_MODEL=infgrad/Jasper-Token-Compression-600M \
    EMBEDDING_DEVICE=cpu \
    EMBEDDING_BATCH_SIZE=2 \
    EMBEDDING_TRUST_REMOTE_CODE=true \
    EMBEDDING_QUERY_PROMPT_NAME=query \
    EMBEDDING_COMPRESSION_RATIO=0.5 \
    EMBEDDING_TOKENIZER_PADDING_SIDE=left \
    CHUNK_TARGET_CHARS=800 \
    CHUNK_MAX_CHARS=900 \
    CHUNK_OVERLAP_CHARS=120 \
    TOP_K=6 \
    MAX_EVIDENCE_CHUNKS=4 \
    MAX_CONTEXT_CHARS=10000 \
    MIN_RETRIEVAL_SCORE=0.35 \
    MAX_UPLOAD_MB=15 \
    ALLOWED_EXTENSIONS=.pdf,.docx

EXPOSE 7860

CMD ["./start-space.sh"]