# Wan Studio — Phase 1 pinned dependencies # Verified empirically against the ZeroGPU Blackwell MIG environment: # torch 2.11.0+cu130 / Python 3.12.12 / ZEROGPU_V2=true # Core ML torch>=2.8.0,<2.12 # sm_120 Blackwell support, ZeroGPU runtime upper-bound diffusers>=0.38.0 # load_into_transformer_2 for Wan 2.2 MoE LoRA (PR #12074) transformers>=4.45 # UMT5EncoderModel, CLIPVisionModel accelerate>=0.34 peft>=0.13 ftfy>=6.1 # WanImageToVideoPipeline._clean_caption (prompt normalization) # HF infrastructure huggingface_hub>=1.0,<2.0 # v1.x API — note HfFolder removed spaces>=0.50.2 # ZeroGPU @spaces.GPU + AOTI API gradio>=5.49.0 # HfFolder import compat fix landed here hf_xet>=1.0 # Xet chunk-dedup + parallel transfer for >1GB safetensors hf_transfer>=0.1.8 # accelerated HTTP fallback (HF_HUB_ENABLE_HF_TRANSFER=1) # Quantization (ZeroGPU-only, guarded — not installed locally on MPS) torchao>=0.7 # Video / image I/O opencv-python-headless>=4.10 imageio>=2.36 imageio-ffmpeg>=0.5 Pillow>=11.0 numpy>=2.0 einops>=0.8 # Audio / vision (S2V mode) librosa>=0.10 soundfile>=0.13 easydict>=1.9 # torchvision MUST pair with the torch pin above (torch>=2.8,<2.12 → tv 0.23–0.26). # Do NOT floor at >=0.27 — that pairs with torch 2.12 and breaks Space resolution # (the local dev venv happens to run torch 2.12+tv0.27, but the Space is torch 2.11). torchvision>=0.23,<0.27 decord # no py3.12 wheel yet; import is guarded in wan/speech2video.py # Animate preproc binary deps (Wan 2.2 Animate-14B — resolved: numpy-2/torch-2.12 safe) onnxruntime>=1.19 # CPU EP for YOLOv10 + ViTPose ONNX inference sam2>=1.1.0 # SAM2 from PyPI; plain CPU import confirmed (no custom build) hydra-core>=1.3.0 # SAM2 model-config loading iopath>=0.1.9 # SAM2 / fvcore dependency fvcore>=0.1.5 # SAM2 dependency moviepy>=2.0 # video I/O in process_pipepline.py loguru>=0.7 # logging in process_pipepline.py matplotlib>=3.0 # human_visualization.py draws skeleton frames # Dev tooling pytest>=8.0 ruff>=0.6