Qwen3.5-397B-A17B-GGUF / logs /quantize-Qwen3.5-397B-A17B-Q8_0.log
ubergarm's picture
provide logs showing imatrix coverage
910390b
Raw
History Blame
158 kB
numactl -N ${SOCKET} -m ${SOCKET} \
./build/bin/llama-quantize \
--pure \
/mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-BF16-00001-of-00017.gguf \
/mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-Q8_0.gguf \
Q8_0 \
128
main: build = 8079 (2912b6c80)
main: built with GNU 13.3.0 for Linux x86_64
main: quantizing '/mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-BF16-00001-of-00017.gguf' to '/mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-Q8_0.gguf' as Q8_0 using 128 threads
llama_model_loader: additional 16 GGUFs metadata loaded.
llama_model_loader: loaded meta data with 46 key-value pairs and 1098 tensors from /mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-BF16-00001-of-00017.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv 0: general.architecture str = qwen35moe
llama_model_loader: - kv 1: general.type str = model
llama_model_loader: - kv 2: general.sampling.top_k i32 = 20
llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000
llama_model_loader: - kv 4: general.sampling.temp f32 = 0.600000
llama_model_loader: - kv 5: general.name str = Qwen3.5 397B A17B
llama_model_loader: - kv 6: general.basename str = Qwen3.5
llama_model_loader: - kv 7: general.size_label str = 397B-A17B
llama_model_loader: - kv 8: general.license str = apache-2.0
llama_model_loader: - kv 9: general.license.link str = https://huggingface.co/Qwen/Qwen3.5-3...
llama_model_loader: - kv 10: general.tags arr[str,1] = ["image-text-to-text"]
llama_model_loader: - kv 11: qwen35moe.block_count u32 = 60
llama_model_loader: - kv 12: qwen35moe.context_length u32 = 262144
llama_model_loader: - kv 13: qwen35moe.embedding_length u32 = 4096
llama_model_loader: - kv 14: qwen35moe.attention.head_count u32 = 32
llama_model_loader: - kv 15: qwen35moe.attention.head_count_kv u32 = 2
llama_model_loader: - kv 16: qwen35moe.rope.dimension_sections arr[i32,4] = [11, 11, 10, 0]
llama_model_loader: - kv 17: qwen35moe.rope.freq_base f32 = 10000000.000000
llama_model_loader: - kv 18: qwen35moe.attention.layer_norm_rms_epsilon f32 = 0.000001
llama_model_loader: - kv 19: qwen35moe.expert_count u32 = 512
llama_model_loader: - kv 20: qwen35moe.expert_used_count u32 = 10
llama_model_loader: - kv 21: qwen35moe.attention.key_length u32 = 256
llama_model_loader: - kv 22: qwen35moe.attention.value_length u32 = 256
llama_model_loader: - kv 23: general.file_type u32 = 32
llama_model_loader: - kv 24: qwen35moe.expert_feed_forward_length u32 = 1024
llama_model_loader: - kv 25: qwen35moe.expert_shared_feed_forward_length u32 = 1024
llama_model_loader: - kv 26: qwen35moe.ssm.conv_kernel u32 = 4
llama_model_loader: - kv 27: qwen35moe.ssm.state_size u32 = 128
llama_model_loader: - kv 28: qwen35moe.ssm.group_count u32 = 16
llama_model_loader: - kv 29: qwen35moe.ssm.time_step_rank u32 = 64
llama_model_loader: - kv 30: qwen35moe.ssm.inner_size u32 = 8192
llama_model_loader: - kv 31: qwen35moe.full_attention_interval u32 = 4
llama_model_loader: - kv 32: qwen35moe.rope.dimension_count u32 = 64
llama_model_loader: - kv 33: general.quantization_version u32 = 2
llama_model_loader: - kv 34: tokenizer.ggml.model str = gpt2
llama_model_loader: - kv 35: tokenizer.ggml.pre str = qwen35
llama_model_loader: - kv 36: tokenizer.ggml.tokens arr[str,248320] = ["!", "\"", "#", "$", "%", "&", "'", ...
llama_model_loader: - kv 37: tokenizer.ggml.token_type arr[i32,248320] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
llama_model_loader: - kv 38: tokenizer.ggml.merges arr[str,247587] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 248046
llama_model_loader: - kv 40: tokenizer.ggml.padding_token_id u32 = 248044
llama_model_loader: - kv 41: tokenizer.ggml.add_bos_token bool = false
llama_model_loader: - kv 42: tokenizer.chat_template str = {%- set image_count = namespace(value...
llama_model_loader: - kv 43: split.no u16 = 0
llama_model_loader: - kv 44: split.count u16 = 17
llama_model_loader: - kv 45: split.tensors.count i32 = 1098
llama_model_loader: - type f32: 451 tensors
llama_model_loader: - type bf16: 647 tensors
[ 1/1098] output.weight - [ 4096, 248320, 1, 1], type = bf16, converting to q8_0 .. size = 1940.00 MiB -> 1030.62 MiB
[ 2/1098] output_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 3/1098] token_embd.weight - [ 4096, 248320, 1, 1], type = bf16, converting to q8_0 .. size = 1940.00 MiB -> 1030.62 MiB
[ 4/1098] blk.0.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 5/1098] blk.0.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 6/1098] blk.0.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 7/1098] blk.0.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 8/1098] blk.0.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 9/1098] blk.0.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 10/1098] blk.0.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 11/1098] blk.0.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 12/1098] blk.0.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 13/1098] blk.0.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 14/1098] blk.0.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 15/1098] blk.0.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 16/1098] blk.0.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 17/1098] blk.0.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 18/1098] blk.0.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 19/1098] blk.0.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 20/1098] blk.0.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 21/1098] blk.0.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 22/1098] blk.0.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 23/1098] blk.1.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 24/1098] blk.1.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 25/1098] blk.1.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 26/1098] blk.1.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 27/1098] blk.1.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 28/1098] blk.1.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 29/1098] blk.1.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 30/1098] blk.1.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 31/1098] blk.1.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 32/1098] blk.1.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 33/1098] blk.1.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 34/1098] blk.1.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 35/1098] blk.1.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 36/1098] blk.1.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 37/1098] blk.1.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 38/1098] blk.1.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 39/1098] blk.1.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 40/1098] blk.1.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 41/1098] blk.1.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 42/1098] blk.2.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 43/1098] blk.2.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 44/1098] blk.2.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 45/1098] blk.2.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 46/1098] blk.2.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 47/1098] blk.2.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 48/1098] blk.2.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 49/1098] blk.2.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 50/1098] blk.2.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 51/1098] blk.2.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 52/1098] blk.2.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 53/1098] blk.2.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 54/1098] blk.2.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 55/1098] blk.2.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 56/1098] blk.2.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 57/1098] blk.2.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 58/1098] blk.2.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 59/1098] blk.2.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 60/1098] blk.2.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 61/1098] blk.3.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 62/1098] blk.3.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 63/1098] blk.3.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 64/1098] blk.3.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 65/1098] blk.3.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 66/1098] blk.3.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 67/1098] blk.3.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 68/1098] blk.3.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 69/1098] blk.3.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 70/1098] blk.3.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 71/1098] blk.3.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 72/1098] blk.3.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 73/1098] blk.3.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 74/1098] blk.3.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 75/1098] blk.3.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 76/1098] blk.3.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 77/1098] blk.4.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 78/1098] blk.4.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 79/1098] blk.4.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 80/1098] blk.4.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 81/1098] blk.4.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 82/1098] blk.4.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 83/1098] blk.4.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 84/1098] blk.4.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 85/1098] blk.4.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 86/1098] blk.4.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 87/1098] blk.4.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 88/1098] blk.4.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 89/1098] blk.4.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 90/1098] blk.4.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 91/1098] blk.4.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 92/1098] blk.4.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 93/1098] blk.4.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 94/1098] blk.4.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 95/1098] blk.4.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 96/1098] blk.5.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 97/1098] blk.5.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 98/1098] blk.5.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 99/1098] blk.5.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 100/1098] blk.5.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 101/1098] blk.5.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 102/1098] blk.5.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 103/1098] blk.5.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 104/1098] blk.5.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 105/1098] blk.5.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 106/1098] blk.5.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 107/1098] blk.5.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 108/1098] blk.5.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 109/1098] blk.5.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 110/1098] blk.5.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 111/1098] blk.5.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 112/1098] blk.5.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 113/1098] blk.5.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 114/1098] blk.5.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 115/1098] blk.6.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 116/1098] blk.6.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 117/1098] blk.6.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 118/1098] blk.6.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 119/1098] blk.6.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 120/1098] blk.6.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 121/1098] blk.6.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 122/1098] blk.6.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 123/1098] blk.6.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 124/1098] blk.6.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 125/1098] blk.6.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 126/1098] blk.6.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 127/1098] blk.6.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 128/1098] blk.6.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 129/1098] blk.6.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 130/1098] blk.6.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 131/1098] blk.6.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 132/1098] blk.6.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 133/1098] blk.6.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 134/1098] blk.7.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 135/1098] blk.7.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 136/1098] blk.7.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 137/1098] blk.7.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 138/1098] blk.7.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 139/1098] blk.7.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 140/1098] blk.7.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 141/1098] blk.7.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 142/1098] blk.7.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 143/1098] blk.7.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 144/1098] blk.7.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 145/1098] blk.7.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 146/1098] blk.7.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 147/1098] blk.7.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 148/1098] blk.7.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 149/1098] blk.7.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 150/1098] blk.8.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 151/1098] blk.8.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 152/1098] blk.8.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 153/1098] blk.8.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 154/1098] blk.8.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 155/1098] blk.8.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 156/1098] blk.8.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 157/1098] blk.8.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 158/1098] blk.8.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 159/1098] blk.8.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 160/1098] blk.8.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 161/1098] blk.8.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 162/1098] blk.8.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 163/1098] blk.8.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 164/1098] blk.8.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 165/1098] blk.8.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 166/1098] blk.8.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 167/1098] blk.8.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 168/1098] blk.8.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 169/1098] blk.9.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 170/1098] blk.9.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 171/1098] blk.9.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 172/1098] blk.9.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 173/1098] blk.9.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 174/1098] blk.9.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 175/1098] blk.9.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 176/1098] blk.9.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 177/1098] blk.9.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 178/1098] blk.9.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 179/1098] blk.9.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 180/1098] blk.9.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 181/1098] blk.9.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 182/1098] blk.9.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 183/1098] blk.9.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 184/1098] blk.9.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 185/1098] blk.9.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 186/1098] blk.9.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 187/1098] blk.9.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 188/1098] blk.10.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 189/1098] blk.10.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 190/1098] blk.10.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 191/1098] blk.10.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 192/1098] blk.10.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 193/1098] blk.10.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 194/1098] blk.10.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 195/1098] blk.10.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 196/1098] blk.10.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 197/1098] blk.10.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 198/1098] blk.10.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 199/1098] blk.10.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 200/1098] blk.10.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 201/1098] blk.10.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 202/1098] blk.10.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 203/1098] blk.10.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 204/1098] blk.10.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 205/1098] blk.10.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 206/1098] blk.10.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 207/1098] blk.11.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 208/1098] blk.11.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 209/1098] blk.11.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 210/1098] blk.11.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 211/1098] blk.11.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 212/1098] blk.11.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 213/1098] blk.11.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 214/1098] blk.11.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 215/1098] blk.11.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 216/1098] blk.11.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 217/1098] blk.11.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 218/1098] blk.11.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 219/1098] blk.11.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 220/1098] blk.11.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 221/1098] blk.11.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 222/1098] blk.11.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 223/1098] blk.12.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 224/1098] blk.12.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 225/1098] blk.12.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 226/1098] blk.12.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 227/1098] blk.12.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 228/1098] blk.12.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 229/1098] blk.12.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 230/1098] blk.12.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 231/1098] blk.12.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 232/1098] blk.12.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 233/1098] blk.12.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 234/1098] blk.12.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 235/1098] blk.12.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 236/1098] blk.12.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 237/1098] blk.12.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 238/1098] blk.12.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 239/1098] blk.12.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 240/1098] blk.12.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 241/1098] blk.12.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 242/1098] blk.13.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 243/1098] blk.13.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 244/1098] blk.13.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 245/1098] blk.13.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 246/1098] blk.13.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 247/1098] blk.13.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 248/1098] blk.13.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 249/1098] blk.13.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 250/1098] blk.13.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 251/1098] blk.13.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 252/1098] blk.13.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 253/1098] blk.13.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 254/1098] blk.13.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 255/1098] blk.13.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 256/1098] blk.13.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 257/1098] blk.13.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 258/1098] blk.13.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 259/1098] blk.13.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 260/1098] blk.13.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 261/1098] blk.14.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 262/1098] blk.14.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 263/1098] blk.14.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 264/1098] blk.14.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 265/1098] blk.14.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 266/1098] blk.14.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 267/1098] blk.14.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 268/1098] blk.14.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 269/1098] blk.14.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 270/1098] blk.14.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 271/1098] blk.14.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 272/1098] blk.14.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 273/1098] blk.14.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 274/1098] blk.14.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 275/1098] blk.14.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 276/1098] blk.14.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 277/1098] blk.14.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 278/1098] blk.14.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 279/1098] blk.14.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 280/1098] blk.15.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 281/1098] blk.15.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 282/1098] blk.15.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 283/1098] blk.15.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 284/1098] blk.15.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 285/1098] blk.15.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 286/1098] blk.15.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 287/1098] blk.15.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 288/1098] blk.15.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 289/1098] blk.15.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 290/1098] blk.15.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 291/1098] blk.15.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 292/1098] blk.15.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 293/1098] blk.15.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 294/1098] blk.15.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 295/1098] blk.15.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 296/1098] blk.16.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 297/1098] blk.16.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 298/1098] blk.16.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 299/1098] blk.16.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 300/1098] blk.16.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 301/1098] blk.16.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 302/1098] blk.16.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 303/1098] blk.16.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 304/1098] blk.16.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 305/1098] blk.16.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 306/1098] blk.16.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 307/1098] blk.16.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 308/1098] blk.16.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 309/1098] blk.16.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 310/1098] blk.16.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 311/1098] blk.16.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 312/1098] blk.16.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 313/1098] blk.16.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 314/1098] blk.16.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 315/1098] blk.17.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 316/1098] blk.17.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 317/1098] blk.17.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 318/1098] blk.17.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 319/1098] blk.17.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 320/1098] blk.17.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 321/1098] blk.17.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 322/1098] blk.17.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 323/1098] blk.17.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 324/1098] blk.17.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 325/1098] blk.17.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 326/1098] blk.17.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 327/1098] blk.17.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 328/1098] blk.17.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 329/1098] blk.17.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 330/1098] blk.17.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 331/1098] blk.17.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 332/1098] blk.17.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 333/1098] blk.17.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 334/1098] blk.18.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 335/1098] blk.18.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 336/1098] blk.18.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 337/1098] blk.18.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 338/1098] blk.18.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 339/1098] blk.18.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 340/1098] blk.18.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 341/1098] blk.18.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 342/1098] blk.18.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 343/1098] blk.18.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 344/1098] blk.18.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 345/1098] blk.18.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 346/1098] blk.18.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 347/1098] blk.18.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 348/1098] blk.18.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 349/1098] blk.18.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 350/1098] blk.18.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 351/1098] blk.18.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 352/1098] blk.18.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 353/1098] blk.19.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 354/1098] blk.19.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 355/1098] blk.19.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 356/1098] blk.19.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 357/1098] blk.19.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 358/1098] blk.19.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 359/1098] blk.19.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 360/1098] blk.19.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 361/1098] blk.19.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 362/1098] blk.19.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 363/1098] blk.19.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 364/1098] blk.19.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 365/1098] blk.19.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 366/1098] blk.19.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 367/1098] blk.19.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 368/1098] blk.19.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 369/1098] blk.20.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 370/1098] blk.20.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 371/1098] blk.20.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 372/1098] blk.20.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 373/1098] blk.20.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 374/1098] blk.20.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 375/1098] blk.20.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 376/1098] blk.20.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 377/1098] blk.20.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 378/1098] blk.20.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 379/1098] blk.20.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 380/1098] blk.20.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 381/1098] blk.20.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 382/1098] blk.20.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 383/1098] blk.20.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 384/1098] blk.20.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 385/1098] blk.20.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 386/1098] blk.20.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 387/1098] blk.20.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 388/1098] blk.21.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 389/1098] blk.21.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 390/1098] blk.21.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 391/1098] blk.21.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 392/1098] blk.21.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 393/1098] blk.21.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 394/1098] blk.21.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 395/1098] blk.21.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 396/1098] blk.21.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 397/1098] blk.21.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 398/1098] blk.21.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 399/1098] blk.21.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 400/1098] blk.21.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 401/1098] blk.21.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 402/1098] blk.21.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 403/1098] blk.21.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 404/1098] blk.21.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 405/1098] blk.21.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 406/1098] blk.21.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 407/1098] blk.22.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 408/1098] blk.22.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 409/1098] blk.22.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 410/1098] blk.22.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 411/1098] blk.22.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 412/1098] blk.22.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 413/1098] blk.22.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 414/1098] blk.22.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 415/1098] blk.22.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 416/1098] blk.22.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 417/1098] blk.22.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 418/1098] blk.22.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 419/1098] blk.22.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 420/1098] blk.22.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 421/1098] blk.22.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 422/1098] blk.22.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 423/1098] blk.22.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 424/1098] blk.22.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 425/1098] blk.22.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 426/1098] blk.23.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 427/1098] blk.23.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 428/1098] blk.23.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 429/1098] blk.23.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 430/1098] blk.23.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 431/1098] blk.23.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 432/1098] blk.23.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 433/1098] blk.23.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 434/1098] blk.23.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 435/1098] blk.23.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 436/1098] blk.23.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 437/1098] blk.23.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 438/1098] blk.23.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 439/1098] blk.23.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 440/1098] blk.23.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 441/1098] blk.23.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 442/1098] blk.24.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 443/1098] blk.24.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 444/1098] blk.24.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 445/1098] blk.24.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 446/1098] blk.24.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 447/1098] blk.24.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 448/1098] blk.24.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 449/1098] blk.24.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 450/1098] blk.24.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 451/1098] blk.24.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 452/1098] blk.24.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 453/1098] blk.24.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 454/1098] blk.24.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 455/1098] blk.24.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 456/1098] blk.24.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 457/1098] blk.24.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 458/1098] blk.24.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 459/1098] blk.24.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 460/1098] blk.24.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 461/1098] blk.25.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 462/1098] blk.25.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 463/1098] blk.25.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 464/1098] blk.25.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 465/1098] blk.25.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 466/1098] blk.25.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 467/1098] blk.25.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 468/1098] blk.25.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 469/1098] blk.25.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 470/1098] blk.25.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 471/1098] blk.25.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 472/1098] blk.25.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 473/1098] blk.25.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 474/1098] blk.25.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 475/1098] blk.25.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 476/1098] blk.25.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 477/1098] blk.25.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 478/1098] blk.25.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 479/1098] blk.25.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 480/1098] blk.26.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 481/1098] blk.26.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 482/1098] blk.26.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 483/1098] blk.26.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 484/1098] blk.26.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 485/1098] blk.26.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 486/1098] blk.26.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 487/1098] blk.26.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 488/1098] blk.26.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 489/1098] blk.26.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 490/1098] blk.26.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 491/1098] blk.26.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 492/1098] blk.26.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 493/1098] blk.26.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 494/1098] blk.26.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 495/1098] blk.26.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 496/1098] blk.26.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 497/1098] blk.26.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 498/1098] blk.26.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 499/1098] blk.27.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 500/1098] blk.27.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 501/1098] blk.27.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 502/1098] blk.27.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 503/1098] blk.27.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 504/1098] blk.27.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 505/1098] blk.27.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 506/1098] blk.27.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 507/1098] blk.27.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 508/1098] blk.27.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 509/1098] blk.27.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 510/1098] blk.27.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 511/1098] blk.27.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 512/1098] blk.27.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 513/1098] blk.27.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 514/1098] blk.27.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 515/1098] blk.28.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 516/1098] blk.28.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 517/1098] blk.28.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 518/1098] blk.28.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 519/1098] blk.28.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 520/1098] blk.28.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 521/1098] blk.28.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 522/1098] blk.28.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 523/1098] blk.28.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 524/1098] blk.28.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 525/1098] blk.28.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 526/1098] blk.28.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 527/1098] blk.28.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 528/1098] blk.28.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 529/1098] blk.28.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 530/1098] blk.28.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 531/1098] blk.28.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 532/1098] blk.28.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 533/1098] blk.28.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 534/1098] blk.29.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 535/1098] blk.29.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 536/1098] blk.29.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 537/1098] blk.29.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 538/1098] blk.29.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 539/1098] blk.29.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 540/1098] blk.29.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 541/1098] blk.29.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 542/1098] blk.29.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 543/1098] blk.29.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 544/1098] blk.29.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 545/1098] blk.29.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 546/1098] blk.29.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 547/1098] blk.29.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 548/1098] blk.29.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 549/1098] blk.29.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 550/1098] blk.29.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 551/1098] blk.29.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 552/1098] blk.29.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 553/1098] blk.30.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 554/1098] blk.30.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 555/1098] blk.30.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 556/1098] blk.30.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 557/1098] blk.30.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 558/1098] blk.30.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 559/1098] blk.30.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 560/1098] blk.30.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 561/1098] blk.30.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 562/1098] blk.30.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 563/1098] blk.30.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 564/1098] blk.30.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 565/1098] blk.30.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 566/1098] blk.30.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 567/1098] blk.30.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 568/1098] blk.30.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 569/1098] blk.30.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 570/1098] blk.30.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 571/1098] blk.30.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 572/1098] blk.31.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 573/1098] blk.31.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 574/1098] blk.31.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 575/1098] blk.31.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 576/1098] blk.31.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 577/1098] blk.31.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 578/1098] blk.31.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 579/1098] blk.31.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 580/1098] blk.31.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 581/1098] blk.31.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 582/1098] blk.31.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 583/1098] blk.31.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 584/1098] blk.31.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 585/1098] blk.31.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 586/1098] blk.31.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 587/1098] blk.31.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 588/1098] blk.32.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 589/1098] blk.32.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 590/1098] blk.32.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 591/1098] blk.32.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 592/1098] blk.32.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 593/1098] blk.32.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 594/1098] blk.32.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 595/1098] blk.32.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 596/1098] blk.32.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 597/1098] blk.32.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 598/1098] blk.32.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 599/1098] blk.32.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 600/1098] blk.32.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 601/1098] blk.32.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 602/1098] blk.32.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 603/1098] blk.32.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 604/1098] blk.32.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 605/1098] blk.32.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 606/1098] blk.32.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 607/1098] blk.33.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 608/1098] blk.33.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 609/1098] blk.33.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 610/1098] blk.33.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 611/1098] blk.33.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 612/1098] blk.33.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 613/1098] blk.33.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 614/1098] blk.33.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 615/1098] blk.33.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 616/1098] blk.33.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 617/1098] blk.33.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 618/1098] blk.33.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 619/1098] blk.33.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 620/1098] blk.33.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 621/1098] blk.33.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 622/1098] blk.33.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 623/1098] blk.33.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 624/1098] blk.33.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 625/1098] blk.33.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 626/1098] blk.34.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 627/1098] blk.34.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 628/1098] blk.34.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 629/1098] blk.34.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 630/1098] blk.34.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 631/1098] blk.34.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 632/1098] blk.34.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 633/1098] blk.34.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 634/1098] blk.34.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 635/1098] blk.34.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 636/1098] blk.34.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 637/1098] blk.34.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 638/1098] blk.34.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 639/1098] blk.34.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 640/1098] blk.34.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 641/1098] blk.34.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 642/1098] blk.34.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 643/1098] blk.34.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 644/1098] blk.34.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 645/1098] blk.35.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 646/1098] blk.35.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 647/1098] blk.35.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 648/1098] blk.35.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 649/1098] blk.35.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 650/1098] blk.35.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 651/1098] blk.35.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 652/1098] blk.35.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 653/1098] blk.35.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 654/1098] blk.35.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 655/1098] blk.35.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 656/1098] blk.35.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 657/1098] blk.35.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 658/1098] blk.35.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 659/1098] blk.35.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 660/1098] blk.35.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 661/1098] blk.36.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 662/1098] blk.36.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 663/1098] blk.36.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 664/1098] blk.36.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 665/1098] blk.36.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 666/1098] blk.36.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 667/1098] blk.36.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 668/1098] blk.36.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 669/1098] blk.36.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 670/1098] blk.36.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 671/1098] blk.36.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 672/1098] blk.36.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 673/1098] blk.36.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 674/1098] blk.36.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 675/1098] blk.36.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 676/1098] blk.36.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 677/1098] blk.36.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 678/1098] blk.36.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 679/1098] blk.36.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 680/1098] blk.37.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 681/1098] blk.37.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 682/1098] blk.37.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 683/1098] blk.37.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 684/1098] blk.37.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 685/1098] blk.37.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 686/1098] blk.37.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 687/1098] blk.37.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 688/1098] blk.37.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 689/1098] blk.37.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 690/1098] blk.37.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 691/1098] blk.37.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 692/1098] blk.37.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 693/1098] blk.37.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 694/1098] blk.37.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 695/1098] blk.37.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 696/1098] blk.37.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 697/1098] blk.37.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 698/1098] blk.37.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 699/1098] blk.38.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 700/1098] blk.38.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 701/1098] blk.38.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 702/1098] blk.38.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 703/1098] blk.38.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 704/1098] blk.38.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 705/1098] blk.38.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 706/1098] blk.38.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 707/1098] blk.38.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 708/1098] blk.38.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 709/1098] blk.38.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 710/1098] blk.38.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 711/1098] blk.38.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 712/1098] blk.38.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 713/1098] blk.38.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 714/1098] blk.38.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 715/1098] blk.38.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 716/1098] blk.38.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 717/1098] blk.38.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 718/1098] blk.39.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 719/1098] blk.39.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 720/1098] blk.39.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 721/1098] blk.39.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 722/1098] blk.39.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 723/1098] blk.39.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 724/1098] blk.39.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 725/1098] blk.39.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 726/1098] blk.39.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 727/1098] blk.39.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 728/1098] blk.39.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 729/1098] blk.39.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 730/1098] blk.39.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 731/1098] blk.39.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 732/1098] blk.39.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 733/1098] blk.39.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 734/1098] blk.40.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 735/1098] blk.40.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 736/1098] blk.40.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 737/1098] blk.40.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 738/1098] blk.40.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 739/1098] blk.40.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 740/1098] blk.40.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 741/1098] blk.40.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 742/1098] blk.40.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 743/1098] blk.40.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 744/1098] blk.40.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 745/1098] blk.40.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 746/1098] blk.40.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 747/1098] blk.40.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 748/1098] blk.40.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 749/1098] blk.40.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 750/1098] blk.40.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 751/1098] blk.40.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 752/1098] blk.40.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 753/1098] blk.41.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 754/1098] blk.41.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 755/1098] blk.41.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 756/1098] blk.41.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 757/1098] blk.41.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 758/1098] blk.41.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 759/1098] blk.41.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 760/1098] blk.41.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 761/1098] blk.41.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 762/1098] blk.41.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 763/1098] blk.41.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 764/1098] blk.41.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 765/1098] blk.41.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 766/1098] blk.41.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 767/1098] blk.41.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 768/1098] blk.41.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 769/1098] blk.41.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 770/1098] blk.41.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 771/1098] blk.41.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 772/1098] blk.42.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 773/1098] blk.42.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 774/1098] blk.42.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 775/1098] blk.42.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 776/1098] blk.42.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 777/1098] blk.42.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 778/1098] blk.42.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 779/1098] blk.42.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 780/1098] blk.42.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 781/1098] blk.42.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 782/1098] blk.42.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 783/1098] blk.42.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 784/1098] blk.42.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 785/1098] blk.42.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 786/1098] blk.42.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 787/1098] blk.42.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 788/1098] blk.42.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 789/1098] blk.42.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 790/1098] blk.42.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 791/1098] blk.43.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 792/1098] blk.43.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 793/1098] blk.43.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 794/1098] blk.43.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 795/1098] blk.43.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 796/1098] blk.43.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 797/1098] blk.43.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 798/1098] blk.43.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 799/1098] blk.43.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 800/1098] blk.43.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 801/1098] blk.43.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 802/1098] blk.43.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 803/1098] blk.43.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 804/1098] blk.43.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 805/1098] blk.43.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 806/1098] blk.43.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 807/1098] blk.44.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 808/1098] blk.44.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 809/1098] blk.44.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 810/1098] blk.44.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 811/1098] blk.44.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 812/1098] blk.44.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 813/1098] blk.44.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 814/1098] blk.44.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 815/1098] blk.44.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 816/1098] blk.44.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 817/1098] blk.44.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 818/1098] blk.44.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 819/1098] blk.44.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 820/1098] blk.44.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 821/1098] blk.44.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 822/1098] blk.44.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 823/1098] blk.44.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 824/1098] blk.44.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 825/1098] blk.44.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 826/1098] blk.45.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 827/1098] blk.45.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 828/1098] blk.45.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 829/1098] blk.45.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 830/1098] blk.45.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 831/1098] blk.45.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 832/1098] blk.45.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 833/1098] blk.45.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 834/1098] blk.45.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 835/1098] blk.45.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 836/1098] blk.45.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 837/1098] blk.45.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 838/1098] blk.45.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 839/1098] blk.45.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 840/1098] blk.45.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 841/1098] blk.45.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 842/1098] blk.45.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 843/1098] blk.45.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 844/1098] blk.45.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 845/1098] blk.46.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 846/1098] blk.46.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 847/1098] blk.46.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 848/1098] blk.46.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 849/1098] blk.46.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 850/1098] blk.46.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 851/1098] blk.46.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 852/1098] blk.46.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 853/1098] blk.46.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 854/1098] blk.46.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 855/1098] blk.46.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 856/1098] blk.46.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 857/1098] blk.46.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 858/1098] blk.46.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 859/1098] blk.46.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 860/1098] blk.46.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 861/1098] blk.46.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 862/1098] blk.46.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 863/1098] blk.46.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 864/1098] blk.47.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 865/1098] blk.47.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 866/1098] blk.47.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 867/1098] blk.47.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 868/1098] blk.47.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 869/1098] blk.47.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 870/1098] blk.47.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 871/1098] blk.47.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 872/1098] blk.47.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 873/1098] blk.47.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 874/1098] blk.47.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 875/1098] blk.47.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 876/1098] blk.47.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 877/1098] blk.47.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 878/1098] blk.47.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 879/1098] blk.47.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 880/1098] blk.48.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 881/1098] blk.48.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 882/1098] blk.48.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 883/1098] blk.48.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 884/1098] blk.48.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 885/1098] blk.48.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 886/1098] blk.48.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 887/1098] blk.48.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 888/1098] blk.48.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 889/1098] blk.48.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 890/1098] blk.48.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 891/1098] blk.48.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 892/1098] blk.48.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 893/1098] blk.48.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 894/1098] blk.48.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 895/1098] blk.48.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 896/1098] blk.48.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 897/1098] blk.48.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 898/1098] blk.48.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 899/1098] blk.49.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 900/1098] blk.49.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 901/1098] blk.49.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 902/1098] blk.49.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 903/1098] blk.49.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 904/1098] blk.49.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 905/1098] blk.49.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 906/1098] blk.49.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 907/1098] blk.49.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 908/1098] blk.49.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 909/1098] blk.49.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 910/1098] blk.49.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 911/1098] blk.49.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 912/1098] blk.49.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 913/1098] blk.49.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 914/1098] blk.49.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 915/1098] blk.49.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 916/1098] blk.49.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 917/1098] blk.49.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 918/1098] blk.50.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 919/1098] blk.50.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 920/1098] blk.50.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 921/1098] blk.50.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 922/1098] blk.50.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 923/1098] blk.50.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 924/1098] blk.50.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 925/1098] blk.50.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 926/1098] blk.50.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 927/1098] blk.50.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 928/1098] blk.50.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 929/1098] blk.50.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 930/1098] blk.50.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 931/1098] blk.50.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 932/1098] blk.50.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 933/1098] blk.50.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 934/1098] blk.50.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 935/1098] blk.50.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 936/1098] blk.50.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 937/1098] blk.51.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 938/1098] blk.51.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 939/1098] blk.51.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 940/1098] blk.51.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 941/1098] blk.51.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[ 942/1098] blk.51.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[ 943/1098] blk.51.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[ 944/1098] blk.51.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 945/1098] blk.51.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 946/1098] blk.51.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 947/1098] blk.51.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 948/1098] blk.51.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 949/1098] blk.51.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 950/1098] blk.51.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 951/1098] blk.51.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 952/1098] blk.51.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 953/1098] blk.52.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 954/1098] blk.52.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 955/1098] blk.52.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 956/1098] blk.52.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 957/1098] blk.52.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 958/1098] blk.52.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 959/1098] blk.52.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 960/1098] blk.52.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 961/1098] blk.52.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 962/1098] blk.52.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 963/1098] blk.52.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 964/1098] blk.52.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 965/1098] blk.52.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 966/1098] blk.52.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 967/1098] blk.52.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 968/1098] blk.52.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 969/1098] blk.52.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 970/1098] blk.52.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 971/1098] blk.52.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 972/1098] blk.53.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 973/1098] blk.53.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 974/1098] blk.53.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 975/1098] blk.53.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 976/1098] blk.53.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 977/1098] blk.53.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 978/1098] blk.53.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 979/1098] blk.53.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 980/1098] blk.53.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 981/1098] blk.53.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 982/1098] blk.53.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 983/1098] blk.53.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 984/1098] blk.53.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 985/1098] blk.53.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 986/1098] blk.53.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[ 987/1098] blk.53.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[ 988/1098] blk.53.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[ 989/1098] blk.53.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[ 990/1098] blk.53.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 991/1098] blk.54.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[ 992/1098] blk.54.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 993/1098] blk.54.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[ 994/1098] blk.54.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 995/1098] blk.54.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[ 996/1098] blk.54.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[ 997/1098] blk.54.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[ 998/1098] blk.54.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[ 999/1098] blk.54.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1000/1098] blk.54.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1001/1098] blk.54.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1002/1098] blk.54.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1003/1098] blk.54.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[1004/1098] blk.54.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[1005/1098] blk.54.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[1006/1098] blk.54.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[1007/1098] blk.54.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[1008/1098] blk.54.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[1009/1098] blk.54.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1010/1098] blk.55.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[1011/1098] blk.55.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[1012/1098] blk.55.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1013/1098] blk.55.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1014/1098] blk.55.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[1015/1098] blk.55.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[1016/1098] blk.55.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[1017/1098] blk.55.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1018/1098] blk.55.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1019/1098] blk.55.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1020/1098] blk.55.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[1021/1098] blk.55.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1022/1098] blk.55.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1023/1098] blk.55.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1024/1098] blk.55.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1025/1098] blk.55.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1026/1098] blk.56.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1027/1098] blk.56.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1028/1098] blk.56.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[1029/1098] blk.56.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1030/1098] blk.56.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1031/1098] blk.56.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1032/1098] blk.56.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[1033/1098] blk.56.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1034/1098] blk.56.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1035/1098] blk.56.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1036/1098] blk.56.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1037/1098] blk.56.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1038/1098] blk.56.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[1039/1098] blk.56.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[1040/1098] blk.56.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[1041/1098] blk.56.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[1042/1098] blk.56.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[1043/1098] blk.56.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[1044/1098] blk.56.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1045/1098] blk.57.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1046/1098] blk.57.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1047/1098] blk.57.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[1048/1098] blk.57.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1049/1098] blk.57.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1050/1098] blk.57.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1051/1098] blk.57.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[1052/1098] blk.57.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1053/1098] blk.57.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1054/1098] blk.57.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1055/1098] blk.57.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1056/1098] blk.57.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1057/1098] blk.57.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[1058/1098] blk.57.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[1059/1098] blk.57.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[1060/1098] blk.57.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[1061/1098] blk.57.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[1062/1098] blk.57.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[1063/1098] blk.57.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1064/1098] blk.58.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1065/1098] blk.58.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1066/1098] blk.58.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB
[1067/1098] blk.58.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1068/1098] blk.58.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1069/1098] blk.58.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1070/1098] blk.58.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[1071/1098] blk.58.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1072/1098] blk.58.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1073/1098] blk.58.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1074/1098] blk.58.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1075/1098] blk.58.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1076/1098] blk.58.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[1077/1098] blk.58.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[1078/1098] blk.58.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB
[1079/1098] blk.58.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB
[1080/1098] blk.58.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB
[1081/1098] blk.58.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB
[1082/1098] blk.58.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1083/1098] blk.59.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[1084/1098] blk.59.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[1085/1098] blk.59.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1086/1098] blk.59.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB
[1087/1098] blk.59.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB
[1088/1098] blk.59.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB
[1089/1098] blk.59.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB
[1090/1098] blk.59.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1091/1098] blk.59.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1092/1098] blk.59.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1093/1098] blk.59.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB
[1094/1098] blk.59.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
[1095/1098] blk.59.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1096/1098] blk.59.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, converting to q8_0 .. size = 4096.00 MiB -> 2176.00 MiB
[1097/1098] blk.59.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB
[1098/1098] blk.59.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB
llama_model_quantize_impl: model size = 756216.34 MiB
llama_model_quantize_impl: quant size = 401970.25 MiB
main: quantize time = 663829.34 ms
main: total time = 663829.35 ms