{ "quant_method": "exl3", "version": "0.0.32", "bits": 3.5, "head_bits": 6, "calibration": { "rows": 250, "cols": 2048 }, "out_scales": "always", "codebook": "mcg", "tensor_storage": { "fc": { "stored_tensors": { "fc.suh": { "shape": [ 10240 ], "n_bytes": 20480, "dtype": "torch.float16" }, "fc.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "fc.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "fc.trellis": { "shape": [ 640, 128, 64 ], "n_bytes": 10485760, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "hidden_norm": { "stored_tensors": { "hidden_norm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.0.input_layernorm": { "stored_tensors": { "layers.0.input_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.0.self_attn.q_proj": { "stored_tensors": { "layers.0.self_attn.q_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.0.self_attn.q_proj.svh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.0.self_attn.q_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.0.self_attn.q_proj.trellis": { "shape": [ 128, 256, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.0.self_attn.k_proj": { "stored_tensors": { "layers.0.self_attn.k_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.0.self_attn.k_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.0.self_attn.k_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.0.self_attn.k_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.0.self_attn.v_proj": { "stored_tensors": { "layers.0.self_attn.v_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.0.self_attn.v_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.0.self_attn.v_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.0.self_attn.v_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.0.self_attn.o_proj": { "stored_tensors": { "layers.0.self_attn.o_proj.suh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.0.self_attn.o_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.0.self_attn.o_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.0.self_attn.o_proj.trellis": { "shape": [ 256, 128, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.0.self_attn.q_norm": { "stored_tensors": { "layers.0.self_attn.q_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.0.self_attn.k_norm": { "stored_tensors": { "layers.0.self_attn.k_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.0.post_attention_layernorm": { "stored_tensors": { "layers.0.post_attention_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.0.mlp.up_proj": { "stored_tensors": { "layers.0.mlp.up_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.0.mlp.up_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.0.mlp.up_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.0.mlp.up_proj.trellis": { "shape": [ 128, 384, 64 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.0.mlp.gate_proj": { "stored_tensors": { "layers.0.mlp.gate_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.0.mlp.gate_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.0.mlp.gate_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.0.mlp.gate_proj.trellis": { "shape": [ 128, 384, 64 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.0.mlp.down_proj": { "stored_tensors": { "layers.0.mlp.down_proj.suh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.0.mlp.down_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.0.mlp.down_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.0.mlp.down_proj.trellis": { "shape": [ 384, 128, 64 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.1.input_layernorm": { "stored_tensors": { "layers.1.input_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.1.self_attn.q_proj": { "stored_tensors": { "layers.1.self_attn.q_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.1.self_attn.q_proj.svh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.1.self_attn.q_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.1.self_attn.q_proj.trellis": { "shape": [ 128, 256, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.1.self_attn.k_proj": { "stored_tensors": { "layers.1.self_attn.k_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.1.self_attn.k_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.1.self_attn.k_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.1.self_attn.k_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.1.self_attn.v_proj": { "stored_tensors": { "layers.1.self_attn.v_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.1.self_attn.v_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.1.self_attn.v_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.1.self_attn.v_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.1.self_attn.o_proj": { "stored_tensors": { "layers.1.self_attn.o_proj.suh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.1.self_attn.o_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.1.self_attn.o_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.1.self_attn.o_proj.trellis": { "shape": [ 256, 128, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.1.self_attn.q_norm": { "stored_tensors": { "layers.1.self_attn.q_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.1.self_attn.k_norm": { "stored_tensors": { "layers.1.self_attn.k_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.1.post_attention_layernorm": { "stored_tensors": { "layers.1.post_attention_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.1.mlp.up_proj": { "stored_tensors": { "layers.1.mlp.up_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.1.mlp.up_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.1.mlp.up_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.1.mlp.up_proj.trellis": { "shape": [ 128, 384, 64 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.1.mlp.gate_proj": { "stored_tensors": { "layers.1.mlp.gate_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.1.mlp.gate_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.1.mlp.gate_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.1.mlp.gate_proj.trellis": { "shape": [ 128, 384, 64 ], "n_bytes": 6291456, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.1.mlp.down_proj": { "stored_tensors": { "layers.1.mlp.down_proj.suh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.1.mlp.down_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.1.mlp.down_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.1.mlp.down_proj.trellis": { "shape": [ 384, 128, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.2.input_layernorm": { "stored_tensors": { "layers.2.input_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.2.self_attn.q_proj": { "stored_tensors": { "layers.2.self_attn.q_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.2.self_attn.q_proj.svh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.2.self_attn.q_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.2.self_attn.q_proj.trellis": { "shape": [ 128, 256, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.2.self_attn.k_proj": { "stored_tensors": { "layers.2.self_attn.k_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.2.self_attn.k_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.2.self_attn.k_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.2.self_attn.k_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.2.self_attn.v_proj": { "stored_tensors": { "layers.2.self_attn.v_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.2.self_attn.v_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.2.self_attn.v_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.2.self_attn.v_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.2.self_attn.o_proj": { "stored_tensors": { "layers.2.self_attn.o_proj.suh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.2.self_attn.o_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.2.self_attn.o_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.2.self_attn.o_proj.trellis": { "shape": [ 256, 128, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.2.self_attn.q_norm": { "stored_tensors": { "layers.2.self_attn.q_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.2.self_attn.k_norm": { "stored_tensors": { "layers.2.self_attn.k_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.2.post_attention_layernorm": { "stored_tensors": { "layers.2.post_attention_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.2.mlp.up_proj": { "stored_tensors": { "layers.2.mlp.up_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.2.mlp.up_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.2.mlp.up_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.2.mlp.up_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.2.mlp.gate_proj": { "stored_tensors": { "layers.2.mlp.gate_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.2.mlp.gate_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.2.mlp.gate_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.2.mlp.gate_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.2.mlp.down_proj": { "stored_tensors": { "layers.2.mlp.down_proj.suh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.2.mlp.down_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.2.mlp.down_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.2.mlp.down_proj.trellis": { "shape": [ 384, 128, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.3.input_layernorm": { "stored_tensors": { "layers.3.input_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.3.self_attn.q_proj": { "stored_tensors": { "layers.3.self_attn.q_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.3.self_attn.q_proj.svh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.3.self_attn.q_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.3.self_attn.q_proj.trellis": { "shape": [ 128, 256, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.3.self_attn.k_proj": { "stored_tensors": { "layers.3.self_attn.k_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.3.self_attn.k_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.3.self_attn.k_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.3.self_attn.k_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.3.self_attn.v_proj": { "stored_tensors": { "layers.3.self_attn.v_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.3.self_attn.v_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.3.self_attn.v_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.3.self_attn.v_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.3.self_attn.o_proj": { "stored_tensors": { "layers.3.self_attn.o_proj.suh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.3.self_attn.o_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.3.self_attn.o_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.3.self_attn.o_proj.trellis": { "shape": [ 256, 128, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.3.self_attn.q_norm": { "stored_tensors": { "layers.3.self_attn.q_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.3.self_attn.k_norm": { "stored_tensors": { "layers.3.self_attn.k_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.3.post_attention_layernorm": { "stored_tensors": { "layers.3.post_attention_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.3.mlp.up_proj": { "stored_tensors": { "layers.3.mlp.up_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.3.mlp.up_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.3.mlp.up_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.3.mlp.up_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.3.mlp.gate_proj": { "stored_tensors": { "layers.3.mlp.gate_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.3.mlp.gate_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.3.mlp.gate_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.3.mlp.gate_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.3.mlp.down_proj": { "stored_tensors": { "layers.3.mlp.down_proj.suh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.3.mlp.down_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.3.mlp.down_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.3.mlp.down_proj.trellis": { "shape": [ 384, 128, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.4.input_layernorm": { "stored_tensors": { "layers.4.input_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.4.self_attn.q_proj": { "stored_tensors": { "layers.4.self_attn.q_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.4.self_attn.q_proj.svh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.4.self_attn.q_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.4.self_attn.q_proj.trellis": { "shape": [ 128, 256, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.4.self_attn.k_proj": { "stored_tensors": { "layers.4.self_attn.k_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.4.self_attn.k_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.4.self_attn.k_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.4.self_attn.k_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.4.self_attn.v_proj": { "stored_tensors": { "layers.4.self_attn.v_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.4.self_attn.v_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.4.self_attn.v_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.4.self_attn.v_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.4.self_attn.o_proj": { "stored_tensors": { "layers.4.self_attn.o_proj.suh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.4.self_attn.o_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.4.self_attn.o_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.4.self_attn.o_proj.trellis": { "shape": [ 256, 128, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.4.self_attn.q_norm": { "stored_tensors": { "layers.4.self_attn.q_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.4.self_attn.k_norm": { "stored_tensors": { "layers.4.self_attn.k_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.4.post_attention_layernorm": { "stored_tensors": { "layers.4.post_attention_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.4.mlp.up_proj": { "stored_tensors": { "layers.4.mlp.up_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.4.mlp.up_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.4.mlp.up_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.4.mlp.up_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.4.mlp.gate_proj": { "stored_tensors": { "layers.4.mlp.gate_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.4.mlp.gate_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.4.mlp.gate_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.4.mlp.gate_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.4.mlp.down_proj": { "stored_tensors": { "layers.4.mlp.down_proj.suh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.4.mlp.down_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.4.mlp.down_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.4.mlp.down_proj.trellis": { "shape": [ 384, 128, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.5.input_layernorm": { "stored_tensors": { "layers.5.input_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.5.self_attn.q_proj": { "stored_tensors": { "layers.5.self_attn.q_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.5.self_attn.q_proj.svh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.5.self_attn.q_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.5.self_attn.q_proj.trellis": { "shape": [ 128, 256, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.5.self_attn.k_proj": { "stored_tensors": { "layers.5.self_attn.k_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.5.self_attn.k_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.5.self_attn.k_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.5.self_attn.k_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.5.self_attn.v_proj": { "stored_tensors": { "layers.5.self_attn.v_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.5.self_attn.v_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.5.self_attn.v_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.5.self_attn.v_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.5.self_attn.o_proj": { "stored_tensors": { "layers.5.self_attn.o_proj.suh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.5.self_attn.o_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.5.self_attn.o_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.5.self_attn.o_proj.trellis": { "shape": [ 256, 128, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.5.self_attn.q_norm": { "stored_tensors": { "layers.5.self_attn.q_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.5.self_attn.k_norm": { "stored_tensors": { "layers.5.self_attn.k_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.5.post_attention_layernorm": { "stored_tensors": { "layers.5.post_attention_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.5.mlp.up_proj": { "stored_tensors": { "layers.5.mlp.up_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.5.mlp.up_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.5.mlp.up_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.5.mlp.up_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.5.mlp.gate_proj": { "stored_tensors": { "layers.5.mlp.gate_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.5.mlp.gate_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.5.mlp.gate_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.5.mlp.gate_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.5.mlp.down_proj": { "stored_tensors": { "layers.5.mlp.down_proj.suh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.5.mlp.down_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.5.mlp.down_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.5.mlp.down_proj.trellis": { "shape": [ 384, 128, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.6.input_layernorm": { "stored_tensors": { "layers.6.input_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.6.self_attn.q_proj": { "stored_tensors": { "layers.6.self_attn.q_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.6.self_attn.q_proj.svh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.6.self_attn.q_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.6.self_attn.q_proj.trellis": { "shape": [ 128, 256, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.6.self_attn.k_proj": { "stored_tensors": { "layers.6.self_attn.k_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.6.self_attn.k_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.6.self_attn.k_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.6.self_attn.k_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.6.self_attn.v_proj": { "stored_tensors": { "layers.6.self_attn.v_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.6.self_attn.v_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.6.self_attn.v_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.6.self_attn.v_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.6.self_attn.o_proj": { "stored_tensors": { "layers.6.self_attn.o_proj.suh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.6.self_attn.o_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.6.self_attn.o_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.6.self_attn.o_proj.trellis": { "shape": [ 256, 128, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.6.self_attn.q_norm": { "stored_tensors": { "layers.6.self_attn.q_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.6.self_attn.k_norm": { "stored_tensors": { "layers.6.self_attn.k_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.6.post_attention_layernorm": { "stored_tensors": { "layers.6.post_attention_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.6.mlp.up_proj": { "stored_tensors": { "layers.6.mlp.up_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.6.mlp.up_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.6.mlp.up_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.6.mlp.up_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.6.mlp.gate_proj": { "stored_tensors": { "layers.6.mlp.gate_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.6.mlp.gate_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.6.mlp.gate_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.6.mlp.gate_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.6.mlp.down_proj": { "stored_tensors": { "layers.6.mlp.down_proj.suh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.6.mlp.down_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.6.mlp.down_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.6.mlp.down_proj.trellis": { "shape": [ 384, 128, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.7.input_layernorm": { "stored_tensors": { "layers.7.input_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.7.self_attn.q_proj": { "stored_tensors": { "layers.7.self_attn.q_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.7.self_attn.q_proj.svh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.7.self_attn.q_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.7.self_attn.q_proj.trellis": { "shape": [ 128, 256, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.7.self_attn.k_proj": { "stored_tensors": { "layers.7.self_attn.k_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.7.self_attn.k_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.7.self_attn.k_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.7.self_attn.k_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.7.self_attn.v_proj": { "stored_tensors": { "layers.7.self_attn.v_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.7.self_attn.v_proj.svh": { "shape": [ 512 ], "n_bytes": 1024, "dtype": "torch.float16" }, "layers.7.self_attn.v_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.7.self_attn.v_proj.trellis": { "shape": [ 128, 32, 64 ], "n_bytes": 524288, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.7.self_attn.o_proj": { "stored_tensors": { "layers.7.self_attn.o_proj.suh": { "shape": [ 4096 ], "n_bytes": 8192, "dtype": "torch.float16" }, "layers.7.self_attn.o_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.7.self_attn.o_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.7.self_attn.o_proj.trellis": { "shape": [ 256, 128, 64 ], "n_bytes": 4194304, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 4, "mcg_multiplier": 3417055213 }, "layers.7.self_attn.q_norm": { "stored_tensors": { "layers.7.self_attn.q_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.7.self_attn.k_norm": { "stored_tensors": { "layers.7.self_attn.k_norm.weight": { "shape": [ 128 ], "n_bytes": 256, "dtype": "torch.bfloat16" } } }, "layers.7.post_attention_layernorm": { "stored_tensors": { "layers.7.post_attention_layernorm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } }, "layers.7.mlp.up_proj": { "stored_tensors": { "layers.7.mlp.up_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.7.mlp.up_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.7.mlp.up_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.7.mlp.up_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.7.mlp.gate_proj": { "stored_tensors": { "layers.7.mlp.gate_proj.suh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.7.mlp.gate_proj.svh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.7.mlp.gate_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.7.mlp.gate_proj.trellis": { "shape": [ 128, 384, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "layers.7.mlp.down_proj": { "stored_tensors": { "layers.7.mlp.down_proj.suh": { "shape": [ 6144 ], "n_bytes": 12288, "dtype": "torch.float16" }, "layers.7.mlp.down_proj.svh": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.float16" }, "layers.7.mlp.down_proj.mcg": { "shape": [], "n_bytes": 4, "dtype": "torch.int32" }, "layers.7.mlp.down_proj.trellis": { "shape": [ 384, 128, 48 ], "n_bytes": 4718592, "dtype": "torch.int16" } }, "quant_format": "exl3", "bits_per_weight": 3, "mcg_multiplier": 3417055213 }, "norm": { "stored_tensors": { "norm.weight": { "shape": [ 2048 ], "n_bytes": 4096, "dtype": "torch.bfloat16" } } } } }