FROM ghcr.io/ggml-org/llama.cpp:server

RUN apt update && apt install wget -y && rm -rf /var/lib/apt/lists/*

RUN wget "https://huggingface.co/unsloth/gemma-4-12B-it-qat-GGUF/resolve/main/gemma-4-12B-it-qat-UD-Q4_K_XL.gguf" -O /gemma-4-12B-it-qat-UD-Q4_K_XL.gguf

RUN wget "https://huggingface.co/unsloth/gemma-4-12B-it-qat-GGUF/resolve/main/mtp-gemma-4-12B-it.gguf" -O /mtp-gemma-4-12B-it.gguf

RUN wget "https://huggingface.co/unsloth/gemma-4-12B-it-qat-GGUF/resolve/main/mmproj-F16.gguf" -O /mmproj.gguf

CMD [ \
  "-m", "/gemma-4-12B-it-qat-UD-Q4_K_XL.gguf", \
  "--spec-draft-model", "/mtp-gemma-4-12B-it.gguf", \
  "--mmproj", "/mmproj.gguf", \
  "--port", "7860", \
  "--host", "0.0.0.0", \
  "-t", "2", \
  "-tb", "2", \
  "-c", "4096", \
  "-fa", "on", \
  "-ub", "128", \
  "-b", "512", \
  "--spec-type", "draft-mtp", \
  "--spec-draft-n-max", "3" \
]