FROM python:3.11-slim ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 WORKDIR /app # System dependencies: # - postgresql / redis-server: internal services for the Space # - build-essential / libpq-dev: Python packages with native extensions # - curl / ca-certificates / zstd: Ollama manual install # - libgomp1: common runtime dependency for ML libraries such as FAISS / torch stack RUN apt-get update \ && apt-get install -y --no-install-recommends \ build-essential \ ca-certificates \ curl \ libgomp1 \ postgresql \ postgresql-contrib \ redis-server \ zstd \ && rm -rf /var/lib/apt/lists/* # Install Ollama manually. # Do not use systemd inside the Space container; start Ollama from start-space.sh. RUN curl -fsSL https://ollama.com/download/ollama-linux-amd64.tar.zst \ | zstd -dc \ | tar -x -C /usr # Ollama runtime/model configuration. ENV OLLAMA_HOST=127.0.0.1:11434 \ OLLAMA_MODELS=/opt/ollama/models \ OLLAMA_CONTEXT_LENGTH=8192 \ OLLAMA_KEEP_ALIVE=5m \ OLLAMA_NUM_PARALLEL=1 \ LOCAL_LLM_BASE_URL=http://127.0.0.1:11434/v1 \ LOCAL_LLM_API_KEY=ollama \ LOCAL_LLM_MODEL=qwen3:0.6b RUN mkdir -p /opt/ollama/models \ && chmod -R 777 /opt/ollama # Pre-pull the small local LLM into the Docker image. # This avoids downloading qwen3:0.6b every time the Space starts. RUN set -eux; \ ollama serve > /tmp/ollama-build.log 2>&1 & \ OLLAMA_PID="$!"; \ for i in $(seq 1 120); do \ if curl -fsS http://127.0.0.1:11434/api/tags >/dev/null 2>&1; then \ echo "Ollama build server is ready."; \ break; \ fi; \ sleep 1; \ done; \ curl -fsS http://127.0.0.1:11434/api/tags >/dev/null; \ ollama pull qwen3:0.6b; \ ollama list; \ kill "${OLLAMA_PID}" || true; \ sleep 2 # Install Python dependencies once before copying the application. COPY requirements.txt ./requirements.txt RUN python -m pip install --upgrade pip \ && python -m pip install -r requirements.txt # Copy application and deployment files. # build_hf_space_bundle.py places Dockerfile, README.md, and start-space.sh # at the root of build/hf-space, so these COPY paths are relative to that bundle. COPY alembic ./alembic COPY alembic.ini ./alembic.ini COPY app ./app COPY sample_docs ./sample_docs COPY scripts ./scripts COPY start-space.sh ./start-space.sh COPY README.md ./README.md RUN chmod +x ./start-space.sh \ && chmod +x ./scripts/start-api.sh \ && chmod +x ./scripts/start-worker.sh # Hugging Face Space runtime configuration. # FastAPI must listen on 7860. Your start-api.sh reads PORT first, # then API_PORT, then falls back to 8000, so both PORT and API_PORT are set. ENV APP_NAME="Smart Document QA" \ APP_ENV=production \ DEMO_MODE=true \ SERVICE_ROLE=api \ RUN_MIGRATIONS_ON_START=false \ PORT=7860 \ API_PORT=7860 \ API_HOST=0.0.0.0 \ DATABASE_URL=postgresql+psycopg://docqa:docqa@127.0.0.1:5432/docqa \ REDIS_URL=redis://127.0.0.1:6379/0 \ CELERY_CONCURRENCY=1 \ OPENAI_BASE_URL= \ OPENAI_API_KEY= \ OPENAI_MODEL= \ DEFAULT_OPENAI_MODEL= \ EMBEDDING_MODEL=infgrad/Jasper-Token-Compression-600M \ EMBEDDING_DEVICE=cpu \ EMBEDDING_BATCH_SIZE=2 \ EMBEDDING_TRUST_REMOTE_CODE=true \ EMBEDDING_QUERY_PROMPT_NAME=query \ EMBEDDING_COMPRESSION_RATIO=0.5 \ EMBEDDING_TOKENIZER_PADDING_SIDE=left \ CHUNK_TARGET_CHARS=800 \ CHUNK_MAX_CHARS=900 \ CHUNK_OVERLAP_CHARS=120 \ TOP_K=6 \ MAX_EVIDENCE_CHUNKS=4 \ MAX_CONTEXT_CHARS=10000 \ MIN_RETRIEVAL_SCORE=0.35 \ MAX_UPLOAD_MB=15 \ ALLOWED_EXTENSIONS=.pdf,.docx EXPOSE 7860 CMD ["./start-space.sh"]