#!/usr/bin/env python3 """Inject MTP head from 27B donor GGUF into 40B Deckard GGUF. Raw binary copy — no KV re-serialization. Inserts nextn_predict_layers and MTP tensor info entries, copies everything else byte-for-byte. """ import argparse import struct import sys import os import io from pathlib import Path from gguf import GGUFReader GGUF_MAGIC = 0x46554747 ALIGNMENT = 32 def align(x, a=ALIGNMENT): return (x + a - 1) & ~(a - 1) def write_string(f, s): encoded = s.encode('utf-8') f.write(struct.pack(' {new_name} ({tensor.data.nbytes:,} B)") print(f" Total: {len(mtp_tensors)}") if not mtp_tensors: print("ERROR: No MTP tensors!"); sys.exit(1) # ── Analyze target structure ─────────────────────────────────────────── print(f"\n[2/4] Analyzing target...") target_reader = GGUFReader(str(target_path)) n_orig_tensors = len(target_reader.tensors) # Find architecture arch = "qwen35" for field in target_reader.fields.values(): if field.name == "general.architecture": arch = str(bytes(field.parts[field.data[0]]), 'utf-8') break nextn_key = f"{arch}.nextn_predict_layers" has_nextn = any('nextn_predict_layers' in f.name for f in target_reader.fields.values()) print(f" Architecture: {arch}") print(f" Tensors: {n_orig_tensors}") # Compute section boundaries # KV end = last non-GGUF field offset + its total parts size kv_end = 0 for field in target_reader.fields.values(): if field.name.startswith('GGUF.'): continue end = field.offset + sum(p.nbytes for p in field.parts) if end > kv_end: kv_end = end # TI size = sum of each tensor info entry ti_size = 0 for t in target_reader.tensors: ti_size += 8 + len(t.name.encode('utf-8')) + 4 + len(t.shape) * 8 + 4 + 8 ti_end = kv_end + ti_size data_start = align(ti_end) orig_tensor_data_size = target_size - data_start print(f" KV end: {kv_end:,}") print(f" TI: {kv_end:,} to {ti_end:,} ({ti_size:,} B)") print(f" Data start: {data_start:,}") print(f" Data size: {orig_tensor_data_size:,} B") # Read original header counts # GGUF v3 header: magic(4) + version(4) + n_tensors(8) + n_kv(8) with open(target_path, 'rb') as f: f.read(8) # magic + version orig_n_tensors = struct.unpack(' pos: out.write(b'\x00' * (new_data_start - pos)) print(f" New data section at: {new_data_start:,}") # The original tensor offsets are relative to the original data section. # Our new data section starts at a different absolute position. # BUT the offsets stored in tensor info are relative to data section start. # Since we copied the TI verbatim, those relative offsets are still correct # as long as the tensor data is at the same relative positions. # So we just need to copy all tensor data starting at new_data_start. # Copy original tensor data print(f" Copying tensor data ({orig_tensor_data_size / (1024**3):.2f} GB)...") CHUNK = 64 * 1024 * 1024 with open(target_path, 'rb') as src: src.seek(data_start) remaining = orig_tensor_data_size copied = 0 while remaining > 0: chunk = src.read(min(CHUNK, remaining)) if not chunk: break out.write(chunk) remaining -= len(chunk) copied += len(chunk) gb = copied / (1024**3) if int(gb * 2) > int((copied - len(chunk)) / (1024**3) * 2): print(f" {gb:.1f} GB...") print(f" Copied: {copied:,} B") # MTP tensor data print(f" Writing MTP data...") for mt in mtp_tensors: target_pos = new_data_start + mt['rel_offset'] cur = out.tell() if target_pos > cur: out.write(b'\x00' * (target_pos - cur)) out.write(mt['data']) print(f" {mt['new_name']} ({len(mt['data']):,} B)") final_size = os.path.getsize(output_path) print(f"\n Output: {final_size:,} B ({final_size / (1024**3):.2f} GB)") # Verify print(f"\nVerifying...") try: vr = GGUFReader(str(output_path)) found = any('nextn_predict_layers' in f.name for f in vr.fields.values()) mc = sum(1 for t in vr.tensors if f'blk.{args.dest_layer}' in t.name) print(f" nextn_predict_layers: {'found' if found else 'MISSING'}") print(f" MTP tensors: {mc}") print(f" Total tensors: {len(vr.tensors)}") print(f" {'PASSED' if found and mc > 0 else 'FAILED'}") except Exception as e: print(f" Python verify failed: {e}") print(f" Try llama-server to test.") if __name__ == '__main__': main()