services: nemotron-parse: build: . image: nemotron-parse:latest working_dir: /workspace deploy: resources: reservations: devices: - driver: nvidia count: all capabilities: [gpu] volumes: # Project source — live-edits on the host are reflected immediately. - .:/workspace # Shadow the host .venv (wrong Python version) so uv uses /opt/venv inside # the container rather than the host-side virtual environment. - /workspace/.venv # HuggingFace model cache — persists the RADIO encoder weights across # container restarts so they are not re-downloaded on every run. - hf-cache:/root/.cache/huggingface environment: HF_HOME: /root/.cache/huggingface # Keep the container alive for interactive use (exec, attach, etc.). stdin_open: true tty: true volumes: hf-cache: