| FROM python:3.11-slim |
|
|
| ENV DEBIAN_FRONTEND=noninteractive \ |
| MODEL_REPO=mixtao/MixTAO-7Bx2-MoE-v8.1-GGUF \ |
| MODEL_FILE=mixtao-7bx2-moe-v8.1.Q4_K_M.gguf \ |
| MODEL_DIR=/data/models/gemma4-coder \ |
| LLAMA_VERSION=b3800 \ |
| LLAMA_DIR=/opt/llama.cpp \ |
| LLAMA_SERVER_BIN=/opt/llama.cpp/llama-server \ |
| LD_LIBRARY_PATH=/opt/llama.cpp \ |
| LLAMA_HOST=0.0.0.0 \ |
| LLAMA_PORT=7860 \ |
| THREADS=4 \ |
| CTX_SIZE=2048 \ |
| BATCH_SIZE=default \ |
| UBATCH_SIZE=default \ |
| FLASH_ATTN=default \ |
| CACHE_TYPE_K=default \ |
| CACHE_TYPE_V=default \ |
| GPU_LAYERS=0 \ |
| TEMPERATURE=0.2 \ |
| TOP_P=0.95 \ |
| TOP_K=64 \ |
| REPEAT_PENALTY=1.08 \ |
| HF_XET_HIGH_PERFORMANCE=1 \ |
| PYTHONUNBUFFERED=1 |
|
|
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| ca-certificates \ |
| curl \ |
| unzip \ |
| libgomp1 \ |
| libstdc++6 \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| RUN mkdir -p "${LLAMA_DIR}" \ |
| && curl -fL "https://github.com/ggml-org/llama.cpp/releases/download/b3800/llama-b3800-bin-ubuntu-x64.zip" -o /tmp/llama.zip \ |
| && unzip -q -j /tmp/llama.zip -d "${LLAMA_DIR}" \ |
| && rm /tmp/llama.zip \ |
| && chmod +x "${LLAMA_SERVER_BIN}" |
|
|
|
|
|
|
| RUN pip install --no-cache-dir \ |
| huggingface_hub |
|
|
| WORKDIR /app |
| COPY app.py /app/app.py |
|
|
| EXPOSE 7860 |
| CMD ["python", "app.py"] |
|
|