Qwen3.6-35B-A3B-DFlash-exl3-6.00bpw / quantization_config.json
UnstableLlama's picture
Upload 5 files
54a9dc0 verified
Raw
History Blame Contribute Delete
76.8 kB
{
"quant_method": "exl3",
"version": "0.0.32",
"bits": 6.0,
"head_bits": 6,
"calibration": {
"rows": 250,
"cols": 2048
},
"out_scales": "always",
"codebook": "mcg",
"tensor_storage": {
"fc": {
"stored_tensors": {
"fc.suh": {
"shape": [
10240
],
"n_bytes": 20480,
"dtype": "torch.float16"
},
"fc.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"fc.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"fc.trellis": {
"shape": [
640,
128,
96
],
"n_bytes": 15728640,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"hidden_norm": {
"stored_tensors": {
"hidden_norm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.0.input_layernorm": {
"stored_tensors": {
"layers.0.input_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.0.self_attn.q_proj": {
"stored_tensors": {
"layers.0.self_attn.q_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.0.self_attn.q_proj.svh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.0.self_attn.q_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.0.self_attn.q_proj.trellis": {
"shape": [
128,
256,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.0.self_attn.k_proj": {
"stored_tensors": {
"layers.0.self_attn.k_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.0.self_attn.k_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.0.self_attn.k_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.0.self_attn.k_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.0.self_attn.v_proj": {
"stored_tensors": {
"layers.0.self_attn.v_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.0.self_attn.v_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.0.self_attn.v_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.0.self_attn.v_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.0.self_attn.o_proj": {
"stored_tensors": {
"layers.0.self_attn.o_proj.suh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.0.self_attn.o_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.0.self_attn.o_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.0.self_attn.o_proj.trellis": {
"shape": [
256,
128,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.0.self_attn.q_norm": {
"stored_tensors": {
"layers.0.self_attn.q_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.0.self_attn.k_norm": {
"stored_tensors": {
"layers.0.self_attn.k_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.0.post_attention_layernorm": {
"stored_tensors": {
"layers.0.post_attention_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.0.mlp.up_proj": {
"stored_tensors": {
"layers.0.mlp.up_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.0.mlp.up_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.0.mlp.up_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.0.mlp.up_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.0.mlp.gate_proj": {
"stored_tensors": {
"layers.0.mlp.gate_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.0.mlp.gate_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.0.mlp.gate_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.0.mlp.gate_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.0.mlp.down_proj": {
"stored_tensors": {
"layers.0.mlp.down_proj.suh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.0.mlp.down_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.0.mlp.down_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.0.mlp.down_proj.trellis": {
"shape": [
384,
128,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.1.input_layernorm": {
"stored_tensors": {
"layers.1.input_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.1.self_attn.q_proj": {
"stored_tensors": {
"layers.1.self_attn.q_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.1.self_attn.q_proj.svh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.1.self_attn.q_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.1.self_attn.q_proj.trellis": {
"shape": [
128,
256,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.1.self_attn.k_proj": {
"stored_tensors": {
"layers.1.self_attn.k_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.1.self_attn.k_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.1.self_attn.k_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.1.self_attn.k_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.1.self_attn.v_proj": {
"stored_tensors": {
"layers.1.self_attn.v_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.1.self_attn.v_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.1.self_attn.v_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.1.self_attn.v_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.1.self_attn.o_proj": {
"stored_tensors": {
"layers.1.self_attn.o_proj.suh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.1.self_attn.o_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.1.self_attn.o_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.1.self_attn.o_proj.trellis": {
"shape": [
256,
128,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.1.self_attn.q_norm": {
"stored_tensors": {
"layers.1.self_attn.q_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.1.self_attn.k_norm": {
"stored_tensors": {
"layers.1.self_attn.k_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.1.post_attention_layernorm": {
"stored_tensors": {
"layers.1.post_attention_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.1.mlp.up_proj": {
"stored_tensors": {
"layers.1.mlp.up_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.1.mlp.up_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.1.mlp.up_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.1.mlp.up_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.1.mlp.gate_proj": {
"stored_tensors": {
"layers.1.mlp.gate_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.1.mlp.gate_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.1.mlp.gate_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.1.mlp.gate_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.1.mlp.down_proj": {
"stored_tensors": {
"layers.1.mlp.down_proj.suh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.1.mlp.down_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.1.mlp.down_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.1.mlp.down_proj.trellis": {
"shape": [
384,
128,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.2.input_layernorm": {
"stored_tensors": {
"layers.2.input_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.2.self_attn.q_proj": {
"stored_tensors": {
"layers.2.self_attn.q_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.2.self_attn.q_proj.svh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.2.self_attn.q_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.2.self_attn.q_proj.trellis": {
"shape": [
128,
256,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.2.self_attn.k_proj": {
"stored_tensors": {
"layers.2.self_attn.k_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.2.self_attn.k_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.2.self_attn.k_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.2.self_attn.k_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.2.self_attn.v_proj": {
"stored_tensors": {
"layers.2.self_attn.v_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.2.self_attn.v_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.2.self_attn.v_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.2.self_attn.v_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.2.self_attn.o_proj": {
"stored_tensors": {
"layers.2.self_attn.o_proj.suh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.2.self_attn.o_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.2.self_attn.o_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.2.self_attn.o_proj.trellis": {
"shape": [
256,
128,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.2.self_attn.q_norm": {
"stored_tensors": {
"layers.2.self_attn.q_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.2.self_attn.k_norm": {
"stored_tensors": {
"layers.2.self_attn.k_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.2.post_attention_layernorm": {
"stored_tensors": {
"layers.2.post_attention_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.2.mlp.up_proj": {
"stored_tensors": {
"layers.2.mlp.up_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.2.mlp.up_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.2.mlp.up_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.2.mlp.up_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.2.mlp.gate_proj": {
"stored_tensors": {
"layers.2.mlp.gate_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.2.mlp.gate_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.2.mlp.gate_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.2.mlp.gate_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.2.mlp.down_proj": {
"stored_tensors": {
"layers.2.mlp.down_proj.suh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.2.mlp.down_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.2.mlp.down_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.2.mlp.down_proj.trellis": {
"shape": [
384,
128,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.3.input_layernorm": {
"stored_tensors": {
"layers.3.input_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.3.self_attn.q_proj": {
"stored_tensors": {
"layers.3.self_attn.q_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.3.self_attn.q_proj.svh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.3.self_attn.q_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.3.self_attn.q_proj.trellis": {
"shape": [
128,
256,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.3.self_attn.k_proj": {
"stored_tensors": {
"layers.3.self_attn.k_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.3.self_attn.k_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.3.self_attn.k_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.3.self_attn.k_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.3.self_attn.v_proj": {
"stored_tensors": {
"layers.3.self_attn.v_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.3.self_attn.v_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.3.self_attn.v_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.3.self_attn.v_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.3.self_attn.o_proj": {
"stored_tensors": {
"layers.3.self_attn.o_proj.suh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.3.self_attn.o_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.3.self_attn.o_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.3.self_attn.o_proj.trellis": {
"shape": [
256,
128,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.3.self_attn.q_norm": {
"stored_tensors": {
"layers.3.self_attn.q_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.3.self_attn.k_norm": {
"stored_tensors": {
"layers.3.self_attn.k_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.3.post_attention_layernorm": {
"stored_tensors": {
"layers.3.post_attention_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.3.mlp.up_proj": {
"stored_tensors": {
"layers.3.mlp.up_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.3.mlp.up_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.3.mlp.up_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.3.mlp.up_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.3.mlp.gate_proj": {
"stored_tensors": {
"layers.3.mlp.gate_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.3.mlp.gate_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.3.mlp.gate_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.3.mlp.gate_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.3.mlp.down_proj": {
"stored_tensors": {
"layers.3.mlp.down_proj.suh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.3.mlp.down_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.3.mlp.down_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.3.mlp.down_proj.trellis": {
"shape": [
384,
128,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.4.input_layernorm": {
"stored_tensors": {
"layers.4.input_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.4.self_attn.q_proj": {
"stored_tensors": {
"layers.4.self_attn.q_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.4.self_attn.q_proj.svh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.4.self_attn.q_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.4.self_attn.q_proj.trellis": {
"shape": [
128,
256,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.4.self_attn.k_proj": {
"stored_tensors": {
"layers.4.self_attn.k_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.4.self_attn.k_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.4.self_attn.k_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.4.self_attn.k_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.4.self_attn.v_proj": {
"stored_tensors": {
"layers.4.self_attn.v_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.4.self_attn.v_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.4.self_attn.v_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.4.self_attn.v_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.4.self_attn.o_proj": {
"stored_tensors": {
"layers.4.self_attn.o_proj.suh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.4.self_attn.o_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.4.self_attn.o_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.4.self_attn.o_proj.trellis": {
"shape": [
256,
128,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.4.self_attn.q_norm": {
"stored_tensors": {
"layers.4.self_attn.q_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.4.self_attn.k_norm": {
"stored_tensors": {
"layers.4.self_attn.k_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.4.post_attention_layernorm": {
"stored_tensors": {
"layers.4.post_attention_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.4.mlp.up_proj": {
"stored_tensors": {
"layers.4.mlp.up_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.4.mlp.up_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.4.mlp.up_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.4.mlp.up_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.4.mlp.gate_proj": {
"stored_tensors": {
"layers.4.mlp.gate_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.4.mlp.gate_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.4.mlp.gate_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.4.mlp.gate_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.4.mlp.down_proj": {
"stored_tensors": {
"layers.4.mlp.down_proj.suh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.4.mlp.down_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.4.mlp.down_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.4.mlp.down_proj.trellis": {
"shape": [
384,
128,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.5.input_layernorm": {
"stored_tensors": {
"layers.5.input_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.5.self_attn.q_proj": {
"stored_tensors": {
"layers.5.self_attn.q_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.5.self_attn.q_proj.svh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.5.self_attn.q_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.5.self_attn.q_proj.trellis": {
"shape": [
128,
256,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.5.self_attn.k_proj": {
"stored_tensors": {
"layers.5.self_attn.k_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.5.self_attn.k_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.5.self_attn.k_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.5.self_attn.k_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.5.self_attn.v_proj": {
"stored_tensors": {
"layers.5.self_attn.v_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.5.self_attn.v_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.5.self_attn.v_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.5.self_attn.v_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.5.self_attn.o_proj": {
"stored_tensors": {
"layers.5.self_attn.o_proj.suh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.5.self_attn.o_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.5.self_attn.o_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.5.self_attn.o_proj.trellis": {
"shape": [
256,
128,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.5.self_attn.q_norm": {
"stored_tensors": {
"layers.5.self_attn.q_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.5.self_attn.k_norm": {
"stored_tensors": {
"layers.5.self_attn.k_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.5.post_attention_layernorm": {
"stored_tensors": {
"layers.5.post_attention_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.5.mlp.up_proj": {
"stored_tensors": {
"layers.5.mlp.up_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.5.mlp.up_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.5.mlp.up_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.5.mlp.up_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.5.mlp.gate_proj": {
"stored_tensors": {
"layers.5.mlp.gate_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.5.mlp.gate_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.5.mlp.gate_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.5.mlp.gate_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.5.mlp.down_proj": {
"stored_tensors": {
"layers.5.mlp.down_proj.suh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.5.mlp.down_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.5.mlp.down_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.5.mlp.down_proj.trellis": {
"shape": [
384,
128,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.6.input_layernorm": {
"stored_tensors": {
"layers.6.input_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.6.self_attn.q_proj": {
"stored_tensors": {
"layers.6.self_attn.q_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.6.self_attn.q_proj.svh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.6.self_attn.q_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.6.self_attn.q_proj.trellis": {
"shape": [
128,
256,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.6.self_attn.k_proj": {
"stored_tensors": {
"layers.6.self_attn.k_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.6.self_attn.k_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.6.self_attn.k_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.6.self_attn.k_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.6.self_attn.v_proj": {
"stored_tensors": {
"layers.6.self_attn.v_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.6.self_attn.v_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.6.self_attn.v_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.6.self_attn.v_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.6.self_attn.o_proj": {
"stored_tensors": {
"layers.6.self_attn.o_proj.suh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.6.self_attn.o_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.6.self_attn.o_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.6.self_attn.o_proj.trellis": {
"shape": [
256,
128,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.6.self_attn.q_norm": {
"stored_tensors": {
"layers.6.self_attn.q_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.6.self_attn.k_norm": {
"stored_tensors": {
"layers.6.self_attn.k_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.6.post_attention_layernorm": {
"stored_tensors": {
"layers.6.post_attention_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.6.mlp.up_proj": {
"stored_tensors": {
"layers.6.mlp.up_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.6.mlp.up_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.6.mlp.up_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.6.mlp.up_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.6.mlp.gate_proj": {
"stored_tensors": {
"layers.6.mlp.gate_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.6.mlp.gate_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.6.mlp.gate_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.6.mlp.gate_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.6.mlp.down_proj": {
"stored_tensors": {
"layers.6.mlp.down_proj.suh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.6.mlp.down_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.6.mlp.down_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.6.mlp.down_proj.trellis": {
"shape": [
384,
128,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.7.input_layernorm": {
"stored_tensors": {
"layers.7.input_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.7.self_attn.q_proj": {
"stored_tensors": {
"layers.7.self_attn.q_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.7.self_attn.q_proj.svh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.7.self_attn.q_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.7.self_attn.q_proj.trellis": {
"shape": [
128,
256,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.7.self_attn.k_proj": {
"stored_tensors": {
"layers.7.self_attn.k_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.7.self_attn.k_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.7.self_attn.k_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.7.self_attn.k_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.7.self_attn.v_proj": {
"stored_tensors": {
"layers.7.self_attn.v_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.7.self_attn.v_proj.svh": {
"shape": [
512
],
"n_bytes": 1024,
"dtype": "torch.float16"
},
"layers.7.self_attn.v_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.7.self_attn.v_proj.trellis": {
"shape": [
128,
32,
96
],
"n_bytes": 786432,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.7.self_attn.o_proj": {
"stored_tensors": {
"layers.7.self_attn.o_proj.suh": {
"shape": [
4096
],
"n_bytes": 8192,
"dtype": "torch.float16"
},
"layers.7.self_attn.o_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.7.self_attn.o_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.7.self_attn.o_proj.trellis": {
"shape": [
256,
128,
96
],
"n_bytes": 6291456,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.7.self_attn.q_norm": {
"stored_tensors": {
"layers.7.self_attn.q_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.7.self_attn.k_norm": {
"stored_tensors": {
"layers.7.self_attn.k_norm.weight": {
"shape": [
128
],
"n_bytes": 256,
"dtype": "torch.bfloat16"
}
}
},
"layers.7.post_attention_layernorm": {
"stored_tensors": {
"layers.7.post_attention_layernorm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
},
"layers.7.mlp.up_proj": {
"stored_tensors": {
"layers.7.mlp.up_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.7.mlp.up_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.7.mlp.up_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.7.mlp.up_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.7.mlp.gate_proj": {
"stored_tensors": {
"layers.7.mlp.gate_proj.suh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.7.mlp.gate_proj.svh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.7.mlp.gate_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.7.mlp.gate_proj.trellis": {
"shape": [
128,
384,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"layers.7.mlp.down_proj": {
"stored_tensors": {
"layers.7.mlp.down_proj.suh": {
"shape": [
6144
],
"n_bytes": 12288,
"dtype": "torch.float16"
},
"layers.7.mlp.down_proj.svh": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.float16"
},
"layers.7.mlp.down_proj.mcg": {
"shape": [],
"n_bytes": 4,
"dtype": "torch.int32"
},
"layers.7.mlp.down_proj.trellis": {
"shape": [
384,
128,
96
],
"n_bytes": 9437184,
"dtype": "torch.int16"
}
},
"quant_format": "exl3",
"bits_per_weight": 6,
"mcg_multiplier": 3417055213
},
"norm": {
"stored_tensors": {
"norm.weight": {
"shape": [
2048
],
"n_bytes": 4096,
"dtype": "torch.bfloat16"
}
}
}
}
}