FROM ghcr.io/ggml-org/llama.cpp:server RUN apt update && apt install wget -y && rm -rf /var/lib/apt/lists/* RUN wget "https://huggingface.co/unsloth/gemma-4-12B-it-qat-GGUF/resolve/main/gemma-4-12B-it-qat-UD-Q4_K_XL.gguf" -O /gemma-4-12B-it-qat-UD-Q4_K_XL.gguf RUN wget "https://huggingface.co/unsloth/gemma-4-12B-it-qat-GGUF/resolve/main/mtp-gemma-4-12B-it.gguf" -O /mtp-gemma-4-12B-it.gguf RUN wget "https://huggingface.co/unsloth/gemma-4-12B-it-qat-GGUF/resolve/main/mmproj-F16.gguf" -O /mmproj.gguf CMD [ \ "-m", "/gemma-4-12B-it-qat-UD-Q4_K_XL.gguf", \ "--spec-draft-model", "/mtp-gemma-4-12B-it.gguf", \ "--mmproj", "/mmproj.gguf", \ "--port", "7860", \ "--host", "0.0.0.0", \ "-t", "2", \ "-tb", "2", \ "-c", "4096", \ "-fa", "on", \ "-ub", "128", \ "-b", "512", \ "--spec-type", "draft-mtp", \ "--spec-draft-n-max", "3" \ ]