# timeout /t 3 /nobreak && mergekit-yaml C:\mergekit-main\moe_karcher.yaml C:\mergekit-main\moe_karcher --copy-tokenizer --allow-crimes --out-shard-size 5B --trust-remote-code --lazy-unpickle --random-seed 420 --cuda # MoE_Karcher # MoE-aware Karcher merge that: # 1. Identifies expert weights by pattern matching # 2. Blends corresponding experts across MoE models # 3. Handles router weights separately with optional strategies merge_method: moe_karcher base_model: B:\8B\Meme-Trix-MoE-14B-A8B-v1 models: - model: B:\8B\Meme-Trix-MoE-14B-A8B-v1 - model: B:\8B\Babsie--CrossroadsLoki-MoE-2x8B parameters: max_iter: 1000 tol: 1e-9 router_strategy: karcher # Options: karcher, average, first, random_init blend_experts: true # Blend corresponding experts (expert[0] + expert[0], etc.) dtype: float32 out_dtype: bfloat16 tokenizer: source: union # chat_template: auto name: Trixster-MoE-Karcher-14B-A8B-v1