www / Dockerfile
zhengr's picture
Update Dockerfile
68d76cc verified
Raw
History Blame Contribute Delete
1.31 kB
FROM python:3.11-slim
ENV DEBIAN_FRONTEND=noninteractive \
MODEL_REPO=mixtao/MixTAO-7Bx2-MoE-v8.1-GGUF \
MODEL_FILE=mixtao-7bx2-moe-v8.1.Q4_K_M.gguf \
MODEL_DIR=/data/models/gemma4-coder \
LLAMA_VERSION=b3800 \
LLAMA_DIR=/opt/llama.cpp \
LLAMA_SERVER_BIN=/opt/llama.cpp/llama-server \
LD_LIBRARY_PATH=/opt/llama.cpp \
LLAMA_HOST=0.0.0.0 \
LLAMA_PORT=7860 \
THREADS=4 \
CTX_SIZE=2048 \
BATCH_SIZE=default \
UBATCH_SIZE=default \
FLASH_ATTN=default \
CACHE_TYPE_K=default \
CACHE_TYPE_V=default \
GPU_LAYERS=0 \
TEMPERATURE=0.2 \
TOP_P=0.95 \
TOP_K=64 \
REPEAT_PENALTY=1.08 \
HF_XET_HIGH_PERFORMANCE=1 \
PYTHONUNBUFFERED=1
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
curl \
unzip \
libgomp1 \
libstdc++6 \
&& rm -rf /var/lib/apt/lists/*
RUN mkdir -p "${LLAMA_DIR}" \
&& curl -fL "https://github.com/ggml-org/llama.cpp/releases/download/b3800/llama-b3800-bin-ubuntu-x64.zip" -o /tmp/llama.zip \
&& unzip -q -j /tmp/llama.zip -d "${LLAMA_DIR}" \
&& rm /tmp/llama.zip \
&& chmod +x "${LLAMA_SERVER_BIN}"
RUN pip install --no-cache-dir \
huggingface_hub
WORKDIR /app
COPY app.py /app/app.py
EXPOSE 7860
CMD ["python", "app.py"]