# refer to secret recipe in README modelcard

main: build = 8083 (6fdfce757)
main: built with GNU 13.3.0 for Linux x86_64
main: quantizing '/mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-BF16-00001-of-00017.gguf' to '/mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-Q3_K.gguf' as Q8_0 using 128 threads
llama_model_loader: additional 16 GGUFs metadata loaded.
llama_model_loader: loaded meta data with 46 key-value pairs and 1098 tensors from /mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-BF16-00001-of-00017.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = qwen35moe
llama_model_loader: - kv   1:                               general.type str              = model
llama_model_loader: - kv   2:                     general.sampling.top_k i32              = 20
llama_model_loader: - kv   3:                     general.sampling.top_p f32              = 0.950000
llama_model_loader: - kv   4:                      general.sampling.temp f32              = 0.600000
llama_model_loader: - kv   5:                               general.name str              = Qwen3.5 397B A17B
llama_model_loader: - kv   6:                           general.basename str              = Qwen3.5
llama_model_loader: - kv   7:                         general.size_label str              = 397B-A17B
llama_model_loader: - kv   8:                            general.license str              = apache-2.0
llama_model_loader: - kv   9:                       general.license.link str              = https://huggingface.co/Qwen/Qwen3.5-3...
llama_model_loader: - kv  10:                               general.tags arr[str,1]       = ["image-text-to-text"]
llama_model_loader: - kv  11:                      qwen35moe.block_count u32              = 60
llama_model_loader: - kv  12:                   qwen35moe.context_length u32              = 262144
llama_model_loader: - kv  13:                 qwen35moe.embedding_length u32              = 4096
llama_model_loader: - kv  14:             qwen35moe.attention.head_count u32              = 32
llama_model_loader: - kv  15:          qwen35moe.attention.head_count_kv u32              = 2
llama_model_loader: - kv  16:          qwen35moe.rope.dimension_sections arr[i32,4]       = [11, 11, 10, 0]
llama_model_loader: - kv  17:                   qwen35moe.rope.freq_base f32              = 10000000.000000
llama_model_loader: - kv  18: qwen35moe.attention.layer_norm_rms_epsilon f32              = 0.000001
llama_model_loader: - kv  19:                     qwen35moe.expert_count u32              = 512
llama_model_loader: - kv  20:                qwen35moe.expert_used_count u32              = 10
llama_model_loader: - kv  21:             qwen35moe.attention.key_length u32              = 256
llama_model_loader: - kv  22:           qwen35moe.attention.value_length u32              = 256
llama_model_loader: - kv  23:                          general.file_type u32              = 32
llama_model_loader: - kv  24:       qwen35moe.expert_feed_forward_length u32              = 1024
llama_model_loader: - kv  25: qwen35moe.expert_shared_feed_forward_length u32              = 1024
llama_model_loader: - kv  26:                  qwen35moe.ssm.conv_kernel u32              = 4
llama_model_loader: - kv  27:                   qwen35moe.ssm.state_size u32              = 128
llama_model_loader: - kv  28:                  qwen35moe.ssm.group_count u32              = 16
llama_model_loader: - kv  29:               qwen35moe.ssm.time_step_rank u32              = 64
llama_model_loader: - kv  30:                   qwen35moe.ssm.inner_size u32              = 8192
llama_model_loader: - kv  31:          qwen35moe.full_attention_interval u32              = 4
llama_model_loader: - kv  32:             qwen35moe.rope.dimension_count u32              = 64
llama_model_loader: - kv  33:               general.quantization_version u32              = 2
llama_model_loader: - kv  34:                       tokenizer.ggml.model str              = gpt2
llama_model_loader: - kv  35:                         tokenizer.ggml.pre str              = qwen35
llama_model_loader: - kv  36:                      tokenizer.ggml.tokens arr[str,248320]  = ["!", "\"", "#", "$", "%", "&", "'", ...
llama_model_loader: - kv  37:                  tokenizer.ggml.token_type arr[i32,248320]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
llama_model_loader: - kv  38:                      tokenizer.ggml.merges arr[str,247587]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
llama_model_loader: - kv  39:                tokenizer.ggml.eos_token_id u32              = 248046
llama_model_loader: - kv  40:            tokenizer.ggml.padding_token_id u32              = 248044
llama_model_loader: - kv  41:               tokenizer.ggml.add_bos_token bool             = false
llama_model_loader: - kv  42:                    tokenizer.chat_template str              = {%- set image_count = namespace(value...
llama_model_loader: - kv  43:                                   split.no u16              = 0
llama_model_loader: - kv  44:                                split.count u16              = 17
llama_model_loader: - kv  45:                        split.tensors.count i32              = 1098
llama_model_loader: - type  f32:  451 tensors
llama_model_loader: - type bf16:  647 tensors
================================ Have weights data with 765 entries
[   1/1098]                        output.weight - [ 4096, 248320,     1,     1], type =   bf16, 
====== llama_model_quantize_impl: did not find weights for output.weight
converting to q6_K .. load_imatrix: imatrix datasets=['ubergarm-imatrix-calibration-corpus-v02.txt']
load_imatrix: loaded 765 importance matrix entries from /mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/imatrix-Qwen3.5-397B-A17B-BF16-mainline.gguf computed on 829 chunks
prepare_imatrix: have 765 importance matrix entries
size =  1940.00 MiB ->   795.70 MiB
[   2/1098]                   output_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[   3/1098]                    token_embd.weight - [ 4096, 248320,     1,     1], type =   bf16, 
====== llama_model_quantize_impl: did not find weights for token_embd.weight
converting to q4_K .. size =  1940.00 MiB ->   545.62 MiB
[   4/1098]               blk.0.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[   5/1098]               blk.0.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[   6/1098]                blk.0.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[   7/1098]           blk.0.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[   8/1098]          blk.0.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[   9/1098]           blk.0.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  10/1098]            blk.0.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[  11/1098]      blk.0.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  12/1098]          blk.0.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  13/1098]             blk.0.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  14/1098]            blk.0.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  15/1098]     blk.0.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  16/1098]                          blk.0.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[  17/1098]               blk.0.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[  18/1098]                blk.0.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[  19/1098]              blk.0.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[  20/1098]                    blk.0.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[  21/1098]                blk.0.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[  22/1098]                 blk.0.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  23/1098]               blk.1.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  24/1098]               blk.1.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  25/1098]                blk.1.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[  26/1098]           blk.1.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[  27/1098]          blk.1.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  28/1098]           blk.1.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  29/1098]            blk.1.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[  30/1098]      blk.1.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  31/1098]          blk.1.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  32/1098]             blk.1.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  33/1098]            blk.1.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  34/1098]     blk.1.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  35/1098]                          blk.1.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[  36/1098]               blk.1.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[  37/1098]                blk.1.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[  38/1098]              blk.1.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[  39/1098]                    blk.1.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[  40/1098]                blk.1.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[  41/1098]                 blk.1.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  42/1098]               blk.2.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  43/1098]               blk.2.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  44/1098]                blk.2.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[  45/1098]           blk.2.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[  46/1098]          blk.2.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  47/1098]           blk.2.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  48/1098]            blk.2.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[  49/1098]      blk.2.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  50/1098]          blk.2.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  51/1098]             blk.2.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  52/1098]            blk.2.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  53/1098]     blk.2.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  54/1098]                          blk.2.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[  55/1098]               blk.2.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[  56/1098]                blk.2.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[  57/1098]              blk.2.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[  58/1098]                    blk.2.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[  59/1098]                blk.2.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[  60/1098]                 blk.2.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  61/1098]                  blk.3.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[  62/1098]             blk.3.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[  63/1098]               blk.3.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  64/1098]             blk.3.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  65/1098]                  blk.3.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[  66/1098]             blk.3.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[  67/1098]                  blk.3.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[  68/1098]           blk.3.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[  69/1098]          blk.3.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  70/1098]           blk.3.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  71/1098]            blk.3.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[  72/1098]      blk.3.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  73/1098]          blk.3.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  74/1098]             blk.3.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  75/1098]            blk.3.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  76/1098]     blk.3.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  77/1098]               blk.4.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  78/1098]               blk.4.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  79/1098]                blk.4.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[  80/1098]           blk.4.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[  81/1098]          blk.4.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  82/1098]           blk.4.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  83/1098]            blk.4.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[  84/1098]      blk.4.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  85/1098]          blk.4.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  86/1098]             blk.4.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[  87/1098]            blk.4.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[  88/1098]     blk.4.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  89/1098]                          blk.4.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[  90/1098]               blk.4.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[  91/1098]                blk.4.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[  92/1098]              blk.4.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[  93/1098]                    blk.4.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[  94/1098]                blk.4.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[  95/1098]                 blk.4.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  96/1098]               blk.5.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[  97/1098]               blk.5.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[  98/1098]                blk.5.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[  99/1098]           blk.5.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 100/1098]          blk.5.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 101/1098]           blk.5.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 102/1098]            blk.5.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 103/1098]      blk.5.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 104/1098]          blk.5.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 105/1098]             blk.5.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 106/1098]            blk.5.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 107/1098]     blk.5.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 108/1098]                          blk.5.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 109/1098]               blk.5.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 110/1098]                blk.5.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 111/1098]              blk.5.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 112/1098]                    blk.5.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 113/1098]                blk.5.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 114/1098]                 blk.5.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 115/1098]               blk.6.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 116/1098]               blk.6.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 117/1098]                blk.6.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 118/1098]           blk.6.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 119/1098]          blk.6.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 120/1098]           blk.6.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 121/1098]            blk.6.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 122/1098]      blk.6.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 123/1098]          blk.6.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 124/1098]             blk.6.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 125/1098]            blk.6.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 126/1098]     blk.6.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 127/1098]                          blk.6.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 128/1098]               blk.6.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 129/1098]                blk.6.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 130/1098]              blk.6.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 131/1098]                    blk.6.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 132/1098]                blk.6.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 133/1098]                 blk.6.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 134/1098]                  blk.7.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 135/1098]             blk.7.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 136/1098]               blk.7.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 137/1098]             blk.7.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 138/1098]                  blk.7.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 139/1098]             blk.7.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 140/1098]                  blk.7.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 141/1098]           blk.7.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 142/1098]          blk.7.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 143/1098]           blk.7.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 144/1098]            blk.7.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 145/1098]      blk.7.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 146/1098]          blk.7.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 147/1098]             blk.7.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 148/1098]            blk.7.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 149/1098]     blk.7.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 150/1098]               blk.8.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 151/1098]               blk.8.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 152/1098]                blk.8.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 153/1098]           blk.8.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 154/1098]          blk.8.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 155/1098]           blk.8.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 156/1098]            blk.8.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 157/1098]      blk.8.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 158/1098]          blk.8.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 159/1098]             blk.8.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 160/1098]            blk.8.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 161/1098]     blk.8.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 162/1098]                          blk.8.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 163/1098]               blk.8.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 164/1098]                blk.8.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 165/1098]              blk.8.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 166/1098]                    blk.8.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 167/1098]                blk.8.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 168/1098]                 blk.8.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 169/1098]               blk.9.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 170/1098]               blk.9.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 171/1098]                blk.9.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 172/1098]           blk.9.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 173/1098]          blk.9.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 174/1098]           blk.9.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 175/1098]            blk.9.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 176/1098]      blk.9.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 177/1098]          blk.9.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 178/1098]             blk.9.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 179/1098]            blk.9.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 180/1098]     blk.9.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 181/1098]                          blk.9.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 182/1098]               blk.9.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 183/1098]                blk.9.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 184/1098]              blk.9.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 185/1098]                    blk.9.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 186/1098]                blk.9.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 187/1098]                 blk.9.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 188/1098]              blk.10.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 189/1098]              blk.10.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 190/1098]               blk.10.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 191/1098]          blk.10.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 192/1098]         blk.10.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 193/1098]          blk.10.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 194/1098]           blk.10.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 195/1098]     blk.10.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 196/1098]         blk.10.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 197/1098]            blk.10.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 198/1098]           blk.10.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 199/1098]    blk.10.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 200/1098]                         blk.10.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 201/1098]              blk.10.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 202/1098]               blk.10.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 203/1098]             blk.10.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 204/1098]                   blk.10.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 205/1098]               blk.10.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 206/1098]                blk.10.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 207/1098]                 blk.11.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 208/1098]            blk.11.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 209/1098]              blk.11.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 210/1098]            blk.11.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 211/1098]                 blk.11.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 212/1098]            blk.11.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 213/1098]                 blk.11.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 214/1098]          blk.11.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 215/1098]         blk.11.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 216/1098]          blk.11.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 217/1098]           blk.11.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 218/1098]     blk.11.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 219/1098]         blk.11.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 220/1098]            blk.11.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 221/1098]           blk.11.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 222/1098]    blk.11.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 223/1098]              blk.12.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 224/1098]              blk.12.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 225/1098]               blk.12.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 226/1098]          blk.12.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 227/1098]         blk.12.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 228/1098]          blk.12.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 229/1098]           blk.12.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 230/1098]     blk.12.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 231/1098]         blk.12.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 232/1098]            blk.12.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 233/1098]           blk.12.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 234/1098]    blk.12.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 235/1098]                         blk.12.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 236/1098]              blk.12.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 237/1098]               blk.12.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 238/1098]             blk.12.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 239/1098]                   blk.12.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 240/1098]               blk.12.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 241/1098]                blk.12.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 242/1098]              blk.13.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 243/1098]              blk.13.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 244/1098]               blk.13.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 245/1098]          blk.13.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 246/1098]         blk.13.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 247/1098]          blk.13.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 248/1098]           blk.13.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 249/1098]     blk.13.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 250/1098]         blk.13.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 251/1098]            blk.13.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 252/1098]           blk.13.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 253/1098]    blk.13.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 254/1098]                         blk.13.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 255/1098]              blk.13.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 256/1098]               blk.13.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 257/1098]             blk.13.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 258/1098]                   blk.13.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 259/1098]               blk.13.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 260/1098]                blk.13.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 261/1098]              blk.14.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 262/1098]              blk.14.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 263/1098]               blk.14.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 264/1098]          blk.14.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 265/1098]         blk.14.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 266/1098]          blk.14.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 267/1098]           blk.14.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 268/1098]     blk.14.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 269/1098]         blk.14.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 270/1098]            blk.14.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 271/1098]           blk.14.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 272/1098]    blk.14.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 273/1098]                         blk.14.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 274/1098]              blk.14.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 275/1098]               blk.14.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 276/1098]             blk.14.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 277/1098]                   blk.14.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 278/1098]               blk.14.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 279/1098]                blk.14.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 280/1098]                 blk.15.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 281/1098]            blk.15.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 282/1098]              blk.15.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 283/1098]            blk.15.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 284/1098]                 blk.15.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 285/1098]            blk.15.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 286/1098]                 blk.15.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 287/1098]          blk.15.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 288/1098]         blk.15.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 289/1098]          blk.15.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 290/1098]           blk.15.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 291/1098]     blk.15.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 292/1098]         blk.15.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 293/1098]            blk.15.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 294/1098]           blk.15.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 295/1098]    blk.15.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 296/1098]              blk.16.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 297/1098]              blk.16.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 298/1098]               blk.16.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 299/1098]          blk.16.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 300/1098]         blk.16.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 301/1098]          blk.16.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 302/1098]           blk.16.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 303/1098]     blk.16.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 304/1098]         blk.16.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 305/1098]            blk.16.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 306/1098]           blk.16.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 307/1098]    blk.16.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 308/1098]                         blk.16.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 309/1098]              blk.16.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 310/1098]               blk.16.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 311/1098]             blk.16.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 312/1098]                   blk.16.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 313/1098]               blk.16.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 314/1098]                blk.16.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 315/1098]              blk.17.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 316/1098]              blk.17.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 317/1098]               blk.17.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 318/1098]          blk.17.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 319/1098]         blk.17.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 320/1098]          blk.17.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 321/1098]           blk.17.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 322/1098]     blk.17.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 323/1098]         blk.17.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 324/1098]            blk.17.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 325/1098]           blk.17.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 326/1098]    blk.17.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 327/1098]                         blk.17.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 328/1098]              blk.17.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 329/1098]               blk.17.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 330/1098]             blk.17.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 331/1098]                   blk.17.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 332/1098]               blk.17.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 333/1098]                blk.17.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 334/1098]              blk.18.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 335/1098]              blk.18.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 336/1098]               blk.18.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 337/1098]          blk.18.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 338/1098]         blk.18.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 339/1098]          blk.18.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 340/1098]           blk.18.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 341/1098]     blk.18.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 342/1098]         blk.18.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 343/1098]            blk.18.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 344/1098]           blk.18.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 345/1098]    blk.18.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 346/1098]                         blk.18.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 347/1098]              blk.18.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 348/1098]               blk.18.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 349/1098]             blk.18.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 350/1098]                   blk.18.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 351/1098]               blk.18.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 352/1098]                blk.18.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 353/1098]                 blk.19.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 354/1098]            blk.19.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 355/1098]              blk.19.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 356/1098]            blk.19.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 357/1098]                 blk.19.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 358/1098]            blk.19.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 359/1098]                 blk.19.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 360/1098]          blk.19.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 361/1098]         blk.19.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 362/1098]          blk.19.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 363/1098]           blk.19.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 364/1098]     blk.19.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 365/1098]         blk.19.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 366/1098]            blk.19.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 367/1098]           blk.19.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 368/1098]    blk.19.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 369/1098]              blk.20.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 370/1098]              blk.20.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 371/1098]               blk.20.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 372/1098]          blk.20.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 373/1098]         blk.20.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 374/1098]          blk.20.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 375/1098]           blk.20.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 376/1098]     blk.20.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 377/1098]         blk.20.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 378/1098]            blk.20.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 379/1098]           blk.20.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 380/1098]    blk.20.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 381/1098]                         blk.20.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 382/1098]              blk.20.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 383/1098]               blk.20.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 384/1098]             blk.20.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 385/1098]                   blk.20.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 386/1098]               blk.20.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 387/1098]                blk.20.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 388/1098]              blk.21.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 389/1098]              blk.21.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 390/1098]               blk.21.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 391/1098]          blk.21.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 392/1098]         blk.21.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 393/1098]          blk.21.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 394/1098]           blk.21.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 395/1098]     blk.21.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 396/1098]         blk.21.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 397/1098]            blk.21.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 398/1098]           blk.21.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 399/1098]    blk.21.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 400/1098]                         blk.21.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 401/1098]              blk.21.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 402/1098]               blk.21.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 403/1098]             blk.21.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 404/1098]                   blk.21.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 405/1098]               blk.21.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 406/1098]                blk.21.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 407/1098]              blk.22.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 408/1098]              blk.22.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 409/1098]               blk.22.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 410/1098]          blk.22.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 411/1098]         blk.22.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 412/1098]          blk.22.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 413/1098]           blk.22.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 414/1098]     blk.22.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 415/1098]         blk.22.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 416/1098]            blk.22.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 417/1098]           blk.22.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 418/1098]    blk.22.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 419/1098]                         blk.22.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 420/1098]              blk.22.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 421/1098]               blk.22.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 422/1098]             blk.22.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 423/1098]                   blk.22.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 424/1098]               blk.22.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 425/1098]                blk.22.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 426/1098]                 blk.23.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 427/1098]            blk.23.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 428/1098]              blk.23.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 429/1098]            blk.23.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 430/1098]                 blk.23.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 431/1098]            blk.23.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 432/1098]                 blk.23.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 433/1098]          blk.23.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 434/1098]         blk.23.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 435/1098]          blk.23.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 436/1098]           blk.23.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 437/1098]     blk.23.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 438/1098]         blk.23.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 439/1098]            blk.23.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 440/1098]           blk.23.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 441/1098]    blk.23.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 442/1098]              blk.24.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 443/1098]              blk.24.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 444/1098]               blk.24.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 445/1098]          blk.24.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 446/1098]         blk.24.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 447/1098]          blk.24.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 448/1098]           blk.24.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 449/1098]     blk.24.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 450/1098]         blk.24.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 451/1098]            blk.24.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 452/1098]           blk.24.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 453/1098]    blk.24.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 454/1098]                         blk.24.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 455/1098]              blk.24.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 456/1098]               blk.24.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 457/1098]             blk.24.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 458/1098]                   blk.24.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 459/1098]               blk.24.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 460/1098]                blk.24.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 461/1098]              blk.25.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 462/1098]              blk.25.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 463/1098]               blk.25.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 464/1098]          blk.25.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 465/1098]         blk.25.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 466/1098]          blk.25.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 467/1098]           blk.25.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 468/1098]     blk.25.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 469/1098]         blk.25.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 470/1098]            blk.25.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 471/1098]           blk.25.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 472/1098]    blk.25.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 473/1098]                         blk.25.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 474/1098]              blk.25.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 475/1098]               blk.25.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 476/1098]             blk.25.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 477/1098]                   blk.25.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 478/1098]               blk.25.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 479/1098]                blk.25.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 480/1098]              blk.26.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 481/1098]              blk.26.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 482/1098]               blk.26.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 483/1098]          blk.26.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 484/1098]         blk.26.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 485/1098]          blk.26.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 486/1098]           blk.26.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 487/1098]     blk.26.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 488/1098]         blk.26.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 489/1098]            blk.26.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 490/1098]           blk.26.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 491/1098]    blk.26.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 492/1098]                         blk.26.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 493/1098]              blk.26.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 494/1098]               blk.26.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 495/1098]             blk.26.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 496/1098]                   blk.26.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 497/1098]               blk.26.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 498/1098]                blk.26.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 499/1098]                 blk.27.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 500/1098]            blk.27.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 501/1098]              blk.27.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 502/1098]            blk.27.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 503/1098]                 blk.27.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 504/1098]            blk.27.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 505/1098]                 blk.27.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 506/1098]          blk.27.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 507/1098]         blk.27.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 508/1098]          blk.27.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 509/1098]           blk.27.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 510/1098]     blk.27.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 511/1098]         blk.27.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 512/1098]            blk.27.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 513/1098]           blk.27.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 514/1098]    blk.27.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 515/1098]              blk.28.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 516/1098]              blk.28.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 517/1098]               blk.28.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 518/1098]          blk.28.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 519/1098]         blk.28.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 520/1098]          blk.28.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 521/1098]           blk.28.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 522/1098]     blk.28.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 523/1098]         blk.28.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 524/1098]            blk.28.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 525/1098]           blk.28.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 526/1098]    blk.28.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 527/1098]                         blk.28.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 528/1098]              blk.28.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 529/1098]               blk.28.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 530/1098]             blk.28.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 531/1098]                   blk.28.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 532/1098]               blk.28.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 533/1098]                blk.28.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 534/1098]              blk.29.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 535/1098]              blk.29.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 536/1098]               blk.29.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 537/1098]          blk.29.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 538/1098]         blk.29.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 539/1098]          blk.29.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 540/1098]           blk.29.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 541/1098]     blk.29.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 542/1098]         blk.29.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 543/1098]            blk.29.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 544/1098]           blk.29.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 545/1098]    blk.29.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 546/1098]                         blk.29.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 547/1098]              blk.29.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 548/1098]               blk.29.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 549/1098]             blk.29.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 550/1098]                   blk.29.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 551/1098]               blk.29.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 552/1098]                blk.29.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 553/1098]              blk.30.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 554/1098]              blk.30.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 555/1098]               blk.30.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 556/1098]          blk.30.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 557/1098]         blk.30.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 558/1098]          blk.30.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 559/1098]           blk.30.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 560/1098]     blk.30.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 561/1098]         blk.30.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 562/1098]            blk.30.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 563/1098]           blk.30.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 564/1098]    blk.30.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 565/1098]                         blk.30.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 566/1098]              blk.30.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 567/1098]               blk.30.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 568/1098]             blk.30.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 569/1098]                   blk.30.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 570/1098]               blk.30.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 571/1098]                blk.30.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 572/1098]                 blk.31.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 573/1098]            blk.31.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 574/1098]              blk.31.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 575/1098]            blk.31.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 576/1098]                 blk.31.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 577/1098]            blk.31.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 578/1098]                 blk.31.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 579/1098]          blk.31.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 580/1098]         blk.31.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 581/1098]          blk.31.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 582/1098]           blk.31.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 583/1098]     blk.31.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 584/1098]         blk.31.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 585/1098]            blk.31.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 586/1098]           blk.31.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 587/1098]    blk.31.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 588/1098]              blk.32.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 589/1098]              blk.32.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 590/1098]               blk.32.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 591/1098]          blk.32.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 592/1098]         blk.32.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 593/1098]          blk.32.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 594/1098]           blk.32.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 595/1098]     blk.32.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 596/1098]         blk.32.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 597/1098]            blk.32.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 598/1098]           blk.32.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 599/1098]    blk.32.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 600/1098]                         blk.32.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 601/1098]              blk.32.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 602/1098]               blk.32.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 603/1098]             blk.32.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 604/1098]                   blk.32.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 605/1098]               blk.32.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 606/1098]                blk.32.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 607/1098]              blk.33.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 608/1098]              blk.33.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 609/1098]               blk.33.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 610/1098]          blk.33.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 611/1098]         blk.33.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 612/1098]          blk.33.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 613/1098]           blk.33.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 614/1098]     blk.33.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 615/1098]         blk.33.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 616/1098]            blk.33.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 617/1098]           blk.33.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 618/1098]    blk.33.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 619/1098]                         blk.33.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 620/1098]              blk.33.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 621/1098]               blk.33.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 622/1098]             blk.33.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 623/1098]                   blk.33.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 624/1098]               blk.33.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 625/1098]                blk.33.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 626/1098]              blk.34.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 627/1098]              blk.34.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 628/1098]               blk.34.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 629/1098]          blk.34.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 630/1098]         blk.34.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 631/1098]          blk.34.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 632/1098]           blk.34.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 633/1098]     blk.34.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 634/1098]         blk.34.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 635/1098]            blk.34.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 636/1098]           blk.34.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 637/1098]    blk.34.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 638/1098]                         blk.34.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 639/1098]              blk.34.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 640/1098]               blk.34.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 641/1098]             blk.34.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 642/1098]                   blk.34.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 643/1098]               blk.34.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 644/1098]                blk.34.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 645/1098]                 blk.35.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 646/1098]            blk.35.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 647/1098]              blk.35.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 648/1098]            blk.35.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 649/1098]                 blk.35.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 650/1098]            blk.35.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 651/1098]                 blk.35.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 652/1098]          blk.35.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 653/1098]         blk.35.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 654/1098]          blk.35.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 655/1098]           blk.35.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 656/1098]     blk.35.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 657/1098]         blk.35.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 658/1098]            blk.35.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 659/1098]           blk.35.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 660/1098]    blk.35.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 661/1098]              blk.36.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 662/1098]              blk.36.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 663/1098]               blk.36.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 664/1098]          blk.36.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 665/1098]         blk.36.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 666/1098]          blk.36.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 667/1098]           blk.36.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 668/1098]     blk.36.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 669/1098]         blk.36.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 670/1098]            blk.36.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 671/1098]           blk.36.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 672/1098]    blk.36.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 673/1098]                         blk.36.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 674/1098]              blk.36.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 675/1098]               blk.36.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 676/1098]             blk.36.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 677/1098]                   blk.36.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 678/1098]               blk.36.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 679/1098]                blk.36.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 680/1098]              blk.37.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 681/1098]              blk.37.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 682/1098]               blk.37.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 683/1098]          blk.37.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 684/1098]         blk.37.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 685/1098]          blk.37.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 686/1098]           blk.37.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 687/1098]     blk.37.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 688/1098]         blk.37.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 689/1098]            blk.37.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 690/1098]           blk.37.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 691/1098]    blk.37.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 692/1098]                         blk.37.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 693/1098]              blk.37.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 694/1098]               blk.37.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 695/1098]             blk.37.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 696/1098]                   blk.37.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 697/1098]               blk.37.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 698/1098]                blk.37.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 699/1098]              blk.38.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 700/1098]              blk.38.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 701/1098]               blk.38.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 702/1098]          blk.38.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 703/1098]         blk.38.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 704/1098]          blk.38.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 705/1098]           blk.38.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 706/1098]     blk.38.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 707/1098]         blk.38.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 708/1098]            blk.38.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 709/1098]           blk.38.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 710/1098]    blk.38.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 711/1098]                         blk.38.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 712/1098]              blk.38.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 713/1098]               blk.38.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 714/1098]             blk.38.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 715/1098]                   blk.38.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 716/1098]               blk.38.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 717/1098]                blk.38.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 718/1098]                 blk.39.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 719/1098]            blk.39.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 720/1098]              blk.39.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 721/1098]            blk.39.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 722/1098]                 blk.39.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 723/1098]            blk.39.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 724/1098]                 blk.39.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 725/1098]          blk.39.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 726/1098]         blk.39.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 727/1098]          blk.39.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 728/1098]           blk.39.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 729/1098]     blk.39.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 730/1098]         blk.39.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 731/1098]            blk.39.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 732/1098]           blk.39.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 733/1098]    blk.39.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 734/1098]              blk.40.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 735/1098]              blk.40.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 736/1098]               blk.40.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 737/1098]          blk.40.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 738/1098]         blk.40.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 739/1098]          blk.40.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 740/1098]           blk.40.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 741/1098]     blk.40.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 742/1098]         blk.40.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 743/1098]            blk.40.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 744/1098]           blk.40.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 745/1098]    blk.40.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 746/1098]                         blk.40.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 747/1098]              blk.40.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 748/1098]               blk.40.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 749/1098]             blk.40.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 750/1098]                   blk.40.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 751/1098]               blk.40.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 752/1098]                blk.40.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 753/1098]              blk.41.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 754/1098]              blk.41.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 755/1098]               blk.41.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 756/1098]          blk.41.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 757/1098]         blk.41.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 758/1098]          blk.41.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 759/1098]           blk.41.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 760/1098]     blk.41.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 761/1098]         blk.41.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 762/1098]            blk.41.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 763/1098]           blk.41.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 764/1098]    blk.41.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 765/1098]                         blk.41.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 766/1098]              blk.41.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 767/1098]               blk.41.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 768/1098]             blk.41.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 769/1098]                   blk.41.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 770/1098]               blk.41.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 771/1098]                blk.41.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 772/1098]              blk.42.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 773/1098]              blk.42.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 774/1098]               blk.42.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 775/1098]          blk.42.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 776/1098]         blk.42.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 777/1098]          blk.42.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 778/1098]           blk.42.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 779/1098]     blk.42.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 780/1098]         blk.42.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 781/1098]            blk.42.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 782/1098]           blk.42.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 783/1098]    blk.42.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 784/1098]                         blk.42.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 785/1098]              blk.42.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 786/1098]               blk.42.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 787/1098]             blk.42.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 788/1098]                   blk.42.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 789/1098]               blk.42.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 790/1098]                blk.42.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 791/1098]                 blk.43.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 792/1098]            blk.43.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 793/1098]              blk.43.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 794/1098]            blk.43.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 795/1098]                 blk.43.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 796/1098]            blk.43.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 797/1098]                 blk.43.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 798/1098]          blk.43.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 799/1098]         blk.43.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 800/1098]          blk.43.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 801/1098]           blk.43.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 802/1098]     blk.43.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 803/1098]         blk.43.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 804/1098]            blk.43.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 805/1098]           blk.43.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 806/1098]    blk.43.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 807/1098]              blk.44.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 808/1098]              blk.44.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 809/1098]               blk.44.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 810/1098]          blk.44.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 811/1098]         blk.44.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 812/1098]          blk.44.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 813/1098]           blk.44.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 814/1098]     blk.44.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 815/1098]         blk.44.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 816/1098]            blk.44.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 817/1098]           blk.44.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 818/1098]    blk.44.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 819/1098]                         blk.44.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 820/1098]              blk.44.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 821/1098]               blk.44.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 822/1098]             blk.44.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 823/1098]                   blk.44.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 824/1098]               blk.44.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 825/1098]                blk.44.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 826/1098]              blk.45.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 827/1098]              blk.45.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 828/1098]               blk.45.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 829/1098]          blk.45.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 830/1098]         blk.45.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 831/1098]          blk.45.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 832/1098]           blk.45.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 833/1098]     blk.45.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 834/1098]         blk.45.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 835/1098]            blk.45.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 836/1098]           blk.45.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 837/1098]    blk.45.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 838/1098]                         blk.45.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 839/1098]              blk.45.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 840/1098]               blk.45.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 841/1098]             blk.45.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 842/1098]                   blk.45.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 843/1098]               blk.45.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 844/1098]                blk.45.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 845/1098]              blk.46.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 846/1098]              blk.46.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 847/1098]               blk.46.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 848/1098]          blk.46.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 849/1098]         blk.46.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 850/1098]          blk.46.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 851/1098]           blk.46.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 852/1098]     blk.46.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 853/1098]         blk.46.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 854/1098]            blk.46.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 855/1098]           blk.46.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 856/1098]    blk.46.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 857/1098]                         blk.46.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 858/1098]              blk.46.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 859/1098]               blk.46.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 860/1098]             blk.46.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 861/1098]                   blk.46.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 862/1098]               blk.46.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 863/1098]                blk.46.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 864/1098]                 blk.47.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 865/1098]            blk.47.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 866/1098]              blk.47.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 867/1098]            blk.47.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 868/1098]                 blk.47.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 869/1098]            blk.47.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 870/1098]                 blk.47.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 871/1098]          blk.47.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 872/1098]         blk.47.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 873/1098]          blk.47.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 874/1098]           blk.47.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 875/1098]     blk.47.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 876/1098]         blk.47.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 877/1098]            blk.47.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 878/1098]           blk.47.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 879/1098]    blk.47.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 880/1098]              blk.48.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 881/1098]              blk.48.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 882/1098]               blk.48.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 883/1098]          blk.48.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 884/1098]         blk.48.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 885/1098]          blk.48.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 886/1098]           blk.48.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 887/1098]     blk.48.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 888/1098]         blk.48.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 889/1098]            blk.48.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 890/1098]           blk.48.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 891/1098]    blk.48.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 892/1098]                         blk.48.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 893/1098]              blk.48.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 894/1098]               blk.48.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 895/1098]             blk.48.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 896/1098]                   blk.48.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 897/1098]               blk.48.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 898/1098]                blk.48.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 899/1098]              blk.49.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 900/1098]              blk.49.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 901/1098]               blk.49.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 902/1098]          blk.49.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 903/1098]         blk.49.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 904/1098]          blk.49.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 905/1098]           blk.49.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 906/1098]     blk.49.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 907/1098]         blk.49.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 908/1098]            blk.49.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 909/1098]           blk.49.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 910/1098]    blk.49.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 911/1098]                         blk.49.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 912/1098]              blk.49.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 913/1098]               blk.49.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 914/1098]             blk.49.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 915/1098]                   blk.49.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 916/1098]               blk.49.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 917/1098]                blk.49.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 918/1098]              blk.50.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 919/1098]              blk.50.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 920/1098]               blk.50.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 921/1098]          blk.50.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 922/1098]         blk.50.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 923/1098]          blk.50.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 924/1098]           blk.50.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 925/1098]     blk.50.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 926/1098]         blk.50.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 927/1098]            blk.50.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 928/1098]           blk.50.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 929/1098]    blk.50.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 930/1098]                         blk.50.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 931/1098]              blk.50.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 932/1098]               blk.50.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 933/1098]             blk.50.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 934/1098]                   blk.50.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 935/1098]               blk.50.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 936/1098]                blk.50.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 937/1098]                 blk.51.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 938/1098]            blk.51.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 939/1098]              blk.51.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 940/1098]            blk.51.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 941/1098]                 blk.51.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[ 942/1098]            blk.51.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[ 943/1098]                 blk.51.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[ 944/1098]          blk.51.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 945/1098]         blk.51.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 946/1098]          blk.51.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 947/1098]           blk.51.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 948/1098]     blk.51.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 949/1098]         blk.51.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 950/1098]            blk.51.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 951/1098]           blk.51.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 952/1098]    blk.51.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 953/1098]              blk.52.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 954/1098]              blk.52.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 955/1098]               blk.52.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 956/1098]          blk.52.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 957/1098]         blk.52.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 958/1098]          blk.52.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 959/1098]           blk.52.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 960/1098]     blk.52.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 961/1098]         blk.52.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 962/1098]            blk.52.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 963/1098]           blk.52.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 964/1098]    blk.52.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 965/1098]                         blk.52.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 966/1098]              blk.52.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 967/1098]               blk.52.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 968/1098]             blk.52.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 969/1098]                   blk.52.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 970/1098]               blk.52.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 971/1098]                blk.52.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 972/1098]              blk.53.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 973/1098]              blk.53.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 974/1098]               blk.53.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 975/1098]          blk.53.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 976/1098]         blk.53.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 977/1098]          blk.53.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 978/1098]           blk.53.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 979/1098]     blk.53.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 980/1098]         blk.53.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 981/1098]            blk.53.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 982/1098]           blk.53.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 983/1098]    blk.53.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 984/1098]                         blk.53.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 985/1098]              blk.53.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 986/1098]               blk.53.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[ 987/1098]             blk.53.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[ 988/1098]                   blk.53.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 989/1098]               blk.53.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[ 990/1098]                blk.53.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 991/1098]              blk.54.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[ 992/1098]              blk.54.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 993/1098]               blk.54.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[ 994/1098]          blk.54.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[ 995/1098]         blk.54.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[ 996/1098]          blk.54.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[ 997/1098]           blk.54.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[ 998/1098]     blk.54.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[ 999/1098]         blk.54.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1000/1098]            blk.54.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1001/1098]           blk.54.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1002/1098]    blk.54.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1003/1098]                         blk.54.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[1004/1098]              blk.54.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[1005/1098]               blk.54.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[1006/1098]             blk.54.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[1007/1098]                   blk.54.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[1008/1098]               blk.54.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[1009/1098]                blk.54.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1010/1098]                 blk.55.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[1011/1098]            blk.55.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[1012/1098]              blk.55.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1013/1098]            blk.55.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1014/1098]                 blk.55.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[1015/1098]            blk.55.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[1016/1098]                 blk.55.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[1017/1098]          blk.55.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[1018/1098]         blk.55.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1019/1098]          blk.55.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1020/1098]           blk.55.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[1021/1098]     blk.55.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1022/1098]         blk.55.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1023/1098]            blk.55.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1024/1098]           blk.55.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1025/1098]    blk.55.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1026/1098]              blk.56.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1027/1098]              blk.56.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1028/1098]               blk.56.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[1029/1098]          blk.56.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[1030/1098]         blk.56.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1031/1098]          blk.56.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1032/1098]           blk.56.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[1033/1098]     blk.56.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1034/1098]         blk.56.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1035/1098]            blk.56.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1036/1098]           blk.56.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1037/1098]    blk.56.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1038/1098]                         blk.56.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[1039/1098]              blk.56.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[1040/1098]               blk.56.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[1041/1098]             blk.56.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[1042/1098]                   blk.56.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[1043/1098]               blk.56.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[1044/1098]                blk.56.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1045/1098]              blk.57.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1046/1098]              blk.57.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1047/1098]               blk.57.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[1048/1098]          blk.57.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[1049/1098]         blk.57.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1050/1098]          blk.57.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1051/1098]           blk.57.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[1052/1098]     blk.57.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1053/1098]         blk.57.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1054/1098]            blk.57.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1055/1098]           blk.57.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1056/1098]    blk.57.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1057/1098]                         blk.57.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[1058/1098]              blk.57.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[1059/1098]               blk.57.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[1060/1098]             blk.57.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[1061/1098]                   blk.57.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[1062/1098]               blk.57.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[1063/1098]                blk.57.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1064/1098]              blk.58.attn_gate.weight - [ 4096,  8192,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1065/1098]              blk.58.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1066/1098]               blk.58.attn_qkv.weight - [ 4096, 12288,     1,     1], type =   bf16, converting to q8_0 .. size =    96.00 MiB ->    51.00 MiB
[1067/1098]          blk.58.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[1068/1098]         blk.58.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1069/1098]          blk.58.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1070/1098]           blk.58.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[1071/1098]     blk.58.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1072/1098]         blk.58.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1073/1098]            blk.58.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1074/1098]           blk.58.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1075/1098]    blk.58.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1076/1098]                         blk.58.ssm_a - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[1077/1098]              blk.58.ssm_alpha.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[1078/1098]               blk.58.ssm_beta.weight - [ 4096,    64,     1,     1], type =   bf16, converting to q8_0 .. size =     0.50 MiB ->     0.27 MiB
[1079/1098]             blk.58.ssm_conv1d.weight - [    4, 12288,     1,     1], type =    f32, size =    0.188 MiB
[1080/1098]                   blk.58.ssm_dt.bias - [   64,     1,     1,     1], type =    f32, size =    0.000 MiB
[1081/1098]               blk.58.ssm_norm.weight - [  128,     1,     1,     1], type =    f32, size =    0.000 MiB
[1082/1098]                blk.58.ssm_out.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1083/1098]                 blk.59.attn_k.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[1084/1098]            blk.59.attn_k_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[1085/1098]              blk.59.attn_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1086/1098]            blk.59.attn_output.weight - [ 8192,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =    64.00 MiB ->    34.00 MiB
[1087/1098]                 blk.59.attn_q.weight - [ 4096, 16384,     1,     1], type =   bf16, converting to q8_0 .. size =   128.00 MiB ->    68.00 MiB
[1088/1098]            blk.59.attn_q_norm.weight - [  256,     1,     1,     1], type =    f32, size =    0.001 MiB
[1089/1098]                 blk.59.attn_v.weight - [ 4096,   512,     1,     1], type =   bf16, converting to q8_0 .. size =     4.00 MiB ->     2.12 MiB
[1090/1098]          blk.59.ffn_down_exps.weight - [ 1024,  4096,   512,     1], type =   bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size =  4096.00 MiB ->  1152.00 MiB
[1091/1098]         blk.59.ffn_down_shexp.weight - [ 1024,  4096,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1092/1098]          blk.59.ffn_gate_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1093/1098]           blk.59.ffn_gate_inp.weight - [ 4096,   512,     1,     1], type =    f32, size =    8.000 MiB
[1094/1098]     blk.59.ffn_gate_inp_shexp.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
[1095/1098]         blk.59.ffn_gate_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1096/1098]            blk.59.ffn_up_exps.weight - [ 4096,  1024,   512,     1], type =   bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size =  4096.00 MiB ->   880.00 MiB
[1097/1098]           blk.59.ffn_up_shexp.weight - [ 4096,  1024,     1,     1], type =   bf16, converting to q8_0 .. size =     8.00 MiB ->     4.25 MiB
[1098/1098]    blk.59.post_attention_norm.weight - [ 4096,     1,     1,     1], type =    f32, size =    0.016 MiB
llama_model_quantize_impl: model size  = 756216.34 MiB
llama_model_quantize_impl: quant size  = 184290.32 MiB

main: quantize time = 693851.09 ms
main:    total time = 693851.09 ms