#!/usr/bin/env fish

cd "$HOME/repos/github/ggml-org/llama.cpp"

set repo_path "$HOME/repos/huggingface.co/g0t4/Qwen-AgentWorld-35B-A3B-GGUF"

hf download Qwen/Qwen-AgentWorld-35B-A3B \
    --local-dir $repo_path/Qwen-AgentWorld-35B-A3B


python convert_hf_to_gguf.py \
    $repo_path/Qwen-AgentWorld-35B-A3B \
    --outfile $repo_path/Qwen-AgentWorld-35B-A3B-F16.gguf \
    --outtype f16 \
    --no-mtp
# FYI Q8_0 GGUF fails if you drop `--no-mtp` here... complains about layer 40 IIRC (must be MTP head(s)?)

./build/bin/llama-quantize \
    $repo_path/Qwen-AgentWorld-35B-A3B-F16.gguf \
    $repo_path/Qwen-AgentWorld-35B-A3B-Q8_0.gguf \
    Q8_0