#!/usr/bin/env fish cd "$HOME/repos/github/ggml-org/llama.cpp" set repo_path "$HOME/repos/huggingface.co/g0t4/Qwen-AgentWorld-35B-A3B-GGUF" hf download Qwen/Qwen-AgentWorld-35B-A3B \ --local-dir $repo_path/Qwen-AgentWorld-35B-A3B python convert_hf_to_gguf.py \ $repo_path/Qwen-AgentWorld-35B-A3B \ --outfile $repo_path/Qwen-AgentWorld-35B-A3B-F16.gguf \ --outtype f16 \ --no-mtp # FYI Q8_0 GGUF fails if you drop `--no-mtp` here... complains about layer 40 IIRC (must be MTP head(s)?) ./build/bin/llama-quantize \ $repo_path/Qwen-AgentWorld-35B-A3B-F16.gguf \ $repo_path/Qwen-AgentWorld-35B-A3B-Q8_0.gguf \ Q8_0