FROM nvidia/cuda:12.1.1-runtime-ubuntu22.04 # Avoid interactive prompts during package installation ENV DEBIAN_FRONTEND=noninteractive # Install Python 3.10 and system dependencies # SpeechBrain requires Python 3.10 and specific torch/transformers versions RUN apt-get update && apt-get install -y --no-install-recommends \ python3.10 \ python3.10-dev \ python3.10-venv \ python3-pip \ git \ libsndfile1 \ ffmpeg \ && rm -rf /var/lib/apt/lists/* # Set Python 3.10 as default RUN ln -sf /usr/bin/python3.10 /usr/bin/python3 && \ ln -sf /usr/bin/python3 /usr/bin/python # Allow pip to install packages system-wide ENV PIP_BREAK_SYSTEM_PACKAGES=1 # Set working directory WORKDIR /app # Install PyTorch ecosystem (cu121 wheels to match CUDA 12.1 base) RUN pip install --no-cache-dir \ torch==2.5.1 \ torchaudio==2.5.1 \ torchvision==0.20.1 \ --index-url https://download.pytorch.org/whl/cu121 # Pin datasets version for SpeechBrain compatibility RUN pip install --no-cache-dir datasets==2.19.1 # Install common requirements (torch already installed above, pip will skip it) RUN pip install --no-cache-dir \ evaluate \ datasets \ librosa \ jiwer \ num2words \ peft # Install SpeechBrain-specific dependencies (pinned transformers/accelerate) RUN pip install --no-cache-dir \ transformers==4.41.2 \ accelerate==0.30.1 \ speechbrain # Force soundfile backend for datasets audio decoding (avoids torchcodec/FFmpeg issues) ENV HF_AUDIO_DECODER_BACKEND=soundfile # Copy the full repository COPY . /app # Default entrypoint ENTRYPOINT ["bash"] # Keep-alive CMD so the Space runtime stays healthy. HF Jobs and `docker run` # override this with their own command (e.g. `run_parakeet.sh`). EXPOSE 7860 CMD ["-c", "python3 -m http.server 7860"]