# refer to secret recipe in README modelcard main: build = 8083 (6fdfce757) main: built with GNU 13.3.0 for Linux x86_64 main: quantizing '/mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-BF16-00001-of-00017.gguf' to '/mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-Q3_K.gguf' as Q8_0 using 128 threads llama_model_loader: additional 16 GGUFs metadata loaded. llama_model_loader: loaded meta data with 46 key-value pairs and 1098 tensors from /mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/Qwen3.5-397B-A17B-BF16-00001-of-00017.gguf (version GGUF V3 (latest)) llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. llama_model_loader: - kv 0: general.architecture str = qwen35moe llama_model_loader: - kv 1: general.type str = model llama_model_loader: - kv 2: general.sampling.top_k i32 = 20 llama_model_loader: - kv 3: general.sampling.top_p f32 = 0.950000 llama_model_loader: - kv 4: general.sampling.temp f32 = 0.600000 llama_model_loader: - kv 5: general.name str = Qwen3.5 397B A17B llama_model_loader: - kv 6: general.basename str = Qwen3.5 llama_model_loader: - kv 7: general.size_label str = 397B-A17B llama_model_loader: - kv 8: general.license str = apache-2.0 llama_model_loader: - kv 9: general.license.link str = https://huggingface.co/Qwen/Qwen3.5-3... llama_model_loader: - kv 10: general.tags arr[str,1] = ["image-text-to-text"] llama_model_loader: - kv 11: qwen35moe.block_count u32 = 60 llama_model_loader: - kv 12: qwen35moe.context_length u32 = 262144 llama_model_loader: - kv 13: qwen35moe.embedding_length u32 = 4096 llama_model_loader: - kv 14: qwen35moe.attention.head_count u32 = 32 llama_model_loader: - kv 15: qwen35moe.attention.head_count_kv u32 = 2 llama_model_loader: - kv 16: qwen35moe.rope.dimension_sections arr[i32,4] = [11, 11, 10, 0] llama_model_loader: - kv 17: qwen35moe.rope.freq_base f32 = 10000000.000000 llama_model_loader: - kv 18: qwen35moe.attention.layer_norm_rms_epsilon f32 = 0.000001 llama_model_loader: - kv 19: qwen35moe.expert_count u32 = 512 llama_model_loader: - kv 20: qwen35moe.expert_used_count u32 = 10 llama_model_loader: - kv 21: qwen35moe.attention.key_length u32 = 256 llama_model_loader: - kv 22: qwen35moe.attention.value_length u32 = 256 llama_model_loader: - kv 23: general.file_type u32 = 32 llama_model_loader: - kv 24: qwen35moe.expert_feed_forward_length u32 = 1024 llama_model_loader: - kv 25: qwen35moe.expert_shared_feed_forward_length u32 = 1024 llama_model_loader: - kv 26: qwen35moe.ssm.conv_kernel u32 = 4 llama_model_loader: - kv 27: qwen35moe.ssm.state_size u32 = 128 llama_model_loader: - kv 28: qwen35moe.ssm.group_count u32 = 16 llama_model_loader: - kv 29: qwen35moe.ssm.time_step_rank u32 = 64 llama_model_loader: - kv 30: qwen35moe.ssm.inner_size u32 = 8192 llama_model_loader: - kv 31: qwen35moe.full_attention_interval u32 = 4 llama_model_loader: - kv 32: qwen35moe.rope.dimension_count u32 = 64 llama_model_loader: - kv 33: general.quantization_version u32 = 2 llama_model_loader: - kv 34: tokenizer.ggml.model str = gpt2 llama_model_loader: - kv 35: tokenizer.ggml.pre str = qwen35 llama_model_loader: - kv 36: tokenizer.ggml.tokens arr[str,248320] = ["!", "\"", "#", "$", "%", "&", "'", ... llama_model_loader: - kv 37: tokenizer.ggml.token_type arr[i32,248320] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... llama_model_loader: - kv 38: tokenizer.ggml.merges arr[str,247587] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",... llama_model_loader: - kv 39: tokenizer.ggml.eos_token_id u32 = 248046 llama_model_loader: - kv 40: tokenizer.ggml.padding_token_id u32 = 248044 llama_model_loader: - kv 41: tokenizer.ggml.add_bos_token bool = false llama_model_loader: - kv 42: tokenizer.chat_template str = {%- set image_count = namespace(value... llama_model_loader: - kv 43: split.no u16 = 0 llama_model_loader: - kv 44: split.count u16 = 17 llama_model_loader: - kv 45: split.tensors.count i32 = 1098 llama_model_loader: - type f32: 451 tensors llama_model_loader: - type bf16: 647 tensors ================================ Have weights data with 765 entries [ 1/1098] output.weight - [ 4096, 248320, 1, 1], type = bf16, ====== llama_model_quantize_impl: did not find weights for output.weight converting to q6_K .. load_imatrix: imatrix datasets=['ubergarm-imatrix-calibration-corpus-v02.txt'] load_imatrix: loaded 765 importance matrix entries from /mnt/data/models/ubergarm/Qwen3.5-397B-A17B-GGUF/imatrix-Qwen3.5-397B-A17B-BF16-mainline.gguf computed on 829 chunks prepare_imatrix: have 765 importance matrix entries size = 1940.00 MiB -> 795.70 MiB [ 2/1098] output_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 3/1098] token_embd.weight - [ 4096, 248320, 1, 1], type = bf16, ====== llama_model_quantize_impl: did not find weights for token_embd.weight converting to q4_K .. size = 1940.00 MiB -> 545.62 MiB [ 4/1098] blk.0.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 5/1098] blk.0.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 6/1098] blk.0.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 7/1098] blk.0.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 8/1098] blk.0.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 9/1098] blk.0.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 10/1098] blk.0.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 11/1098] blk.0.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 12/1098] blk.0.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 13/1098] blk.0.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 14/1098] blk.0.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 15/1098] blk.0.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 16/1098] blk.0.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 17/1098] blk.0.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 18/1098] blk.0.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 19/1098] blk.0.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 20/1098] blk.0.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 21/1098] blk.0.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 22/1098] blk.0.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 23/1098] blk.1.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 24/1098] blk.1.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 25/1098] blk.1.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 26/1098] blk.1.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 27/1098] blk.1.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 28/1098] blk.1.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 29/1098] blk.1.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 30/1098] blk.1.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 31/1098] blk.1.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 32/1098] blk.1.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 33/1098] blk.1.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 34/1098] blk.1.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 35/1098] blk.1.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 36/1098] blk.1.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 37/1098] blk.1.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 38/1098] blk.1.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 39/1098] blk.1.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 40/1098] blk.1.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 41/1098] blk.1.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 42/1098] blk.2.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 43/1098] blk.2.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 44/1098] blk.2.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 45/1098] blk.2.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 46/1098] blk.2.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 47/1098] blk.2.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 48/1098] blk.2.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 49/1098] blk.2.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 50/1098] blk.2.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 51/1098] blk.2.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 52/1098] blk.2.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 53/1098] blk.2.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 54/1098] blk.2.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 55/1098] blk.2.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 56/1098] blk.2.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 57/1098] blk.2.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 58/1098] blk.2.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 59/1098] blk.2.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 60/1098] blk.2.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 61/1098] blk.3.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 62/1098] blk.3.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 63/1098] blk.3.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 64/1098] blk.3.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 65/1098] blk.3.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 66/1098] blk.3.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 67/1098] blk.3.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 68/1098] blk.3.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 69/1098] blk.3.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 70/1098] blk.3.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 71/1098] blk.3.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 72/1098] blk.3.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 73/1098] blk.3.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 74/1098] blk.3.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 75/1098] blk.3.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 76/1098] blk.3.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 77/1098] blk.4.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 78/1098] blk.4.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 79/1098] blk.4.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 80/1098] blk.4.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 81/1098] blk.4.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 82/1098] blk.4.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 83/1098] blk.4.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 84/1098] blk.4.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 85/1098] blk.4.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 86/1098] blk.4.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 87/1098] blk.4.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 88/1098] blk.4.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 89/1098] blk.4.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 90/1098] blk.4.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 91/1098] blk.4.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 92/1098] blk.4.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 93/1098] blk.4.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 94/1098] blk.4.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 95/1098] blk.4.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 96/1098] blk.5.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 97/1098] blk.5.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 98/1098] blk.5.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 99/1098] blk.5.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 100/1098] blk.5.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 101/1098] blk.5.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 102/1098] blk.5.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 103/1098] blk.5.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 104/1098] blk.5.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 105/1098] blk.5.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 106/1098] blk.5.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 107/1098] blk.5.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 108/1098] blk.5.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 109/1098] blk.5.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 110/1098] blk.5.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 111/1098] blk.5.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 112/1098] blk.5.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 113/1098] blk.5.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 114/1098] blk.5.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 115/1098] blk.6.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 116/1098] blk.6.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 117/1098] blk.6.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 118/1098] blk.6.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 119/1098] blk.6.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 120/1098] blk.6.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 121/1098] blk.6.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 122/1098] blk.6.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 123/1098] blk.6.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 124/1098] blk.6.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 125/1098] blk.6.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 126/1098] blk.6.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 127/1098] blk.6.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 128/1098] blk.6.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 129/1098] blk.6.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 130/1098] blk.6.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 131/1098] blk.6.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 132/1098] blk.6.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 133/1098] blk.6.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 134/1098] blk.7.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 135/1098] blk.7.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 136/1098] blk.7.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 137/1098] blk.7.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 138/1098] blk.7.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 139/1098] blk.7.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 140/1098] blk.7.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 141/1098] blk.7.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 142/1098] blk.7.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 143/1098] blk.7.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 144/1098] blk.7.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 145/1098] blk.7.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 146/1098] blk.7.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 147/1098] blk.7.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 148/1098] blk.7.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 149/1098] blk.7.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 150/1098] blk.8.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 151/1098] blk.8.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 152/1098] blk.8.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 153/1098] blk.8.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 154/1098] blk.8.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 155/1098] blk.8.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 156/1098] blk.8.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 157/1098] blk.8.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 158/1098] blk.8.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 159/1098] blk.8.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 160/1098] blk.8.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 161/1098] blk.8.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 162/1098] blk.8.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 163/1098] blk.8.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 164/1098] blk.8.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 165/1098] blk.8.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 166/1098] blk.8.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 167/1098] blk.8.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 168/1098] blk.8.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 169/1098] blk.9.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 170/1098] blk.9.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 171/1098] blk.9.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 172/1098] blk.9.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 173/1098] blk.9.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 174/1098] blk.9.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 175/1098] blk.9.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 176/1098] blk.9.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 177/1098] blk.9.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 178/1098] blk.9.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 179/1098] blk.9.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 180/1098] blk.9.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 181/1098] blk.9.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 182/1098] blk.9.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 183/1098] blk.9.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 184/1098] blk.9.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 185/1098] blk.9.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 186/1098] blk.9.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 187/1098] blk.9.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 188/1098] blk.10.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 189/1098] blk.10.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 190/1098] blk.10.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 191/1098] blk.10.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 192/1098] blk.10.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 193/1098] blk.10.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 194/1098] blk.10.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 195/1098] blk.10.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 196/1098] blk.10.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 197/1098] blk.10.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 198/1098] blk.10.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 199/1098] blk.10.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 200/1098] blk.10.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 201/1098] blk.10.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 202/1098] blk.10.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 203/1098] blk.10.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 204/1098] blk.10.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 205/1098] blk.10.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 206/1098] blk.10.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 207/1098] blk.11.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 208/1098] blk.11.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 209/1098] blk.11.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 210/1098] blk.11.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 211/1098] blk.11.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 212/1098] blk.11.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 213/1098] blk.11.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 214/1098] blk.11.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 215/1098] blk.11.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 216/1098] blk.11.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 217/1098] blk.11.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 218/1098] blk.11.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 219/1098] blk.11.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 220/1098] blk.11.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 221/1098] blk.11.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 222/1098] blk.11.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 223/1098] blk.12.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 224/1098] blk.12.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 225/1098] blk.12.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 226/1098] blk.12.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 227/1098] blk.12.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 228/1098] blk.12.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 229/1098] blk.12.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 230/1098] blk.12.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 231/1098] blk.12.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 232/1098] blk.12.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 233/1098] blk.12.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 234/1098] blk.12.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 235/1098] blk.12.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 236/1098] blk.12.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 237/1098] blk.12.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 238/1098] blk.12.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 239/1098] blk.12.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 240/1098] blk.12.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 241/1098] blk.12.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 242/1098] blk.13.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 243/1098] blk.13.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 244/1098] blk.13.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 245/1098] blk.13.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 246/1098] blk.13.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 247/1098] blk.13.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 248/1098] blk.13.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 249/1098] blk.13.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 250/1098] blk.13.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 251/1098] blk.13.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 252/1098] blk.13.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 253/1098] blk.13.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 254/1098] blk.13.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 255/1098] blk.13.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 256/1098] blk.13.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 257/1098] blk.13.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 258/1098] blk.13.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 259/1098] blk.13.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 260/1098] blk.13.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 261/1098] blk.14.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 262/1098] blk.14.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 263/1098] blk.14.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 264/1098] blk.14.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 265/1098] blk.14.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 266/1098] blk.14.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 267/1098] blk.14.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 268/1098] blk.14.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 269/1098] blk.14.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 270/1098] blk.14.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 271/1098] blk.14.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 272/1098] blk.14.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 273/1098] blk.14.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 274/1098] blk.14.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 275/1098] blk.14.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 276/1098] blk.14.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 277/1098] blk.14.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 278/1098] blk.14.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 279/1098] blk.14.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 280/1098] blk.15.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 281/1098] blk.15.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 282/1098] blk.15.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 283/1098] blk.15.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 284/1098] blk.15.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 285/1098] blk.15.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 286/1098] blk.15.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 287/1098] blk.15.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 288/1098] blk.15.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 289/1098] blk.15.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 290/1098] blk.15.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 291/1098] blk.15.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 292/1098] blk.15.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 293/1098] blk.15.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 294/1098] blk.15.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 295/1098] blk.15.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 296/1098] blk.16.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 297/1098] blk.16.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 298/1098] blk.16.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 299/1098] blk.16.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 300/1098] blk.16.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 301/1098] blk.16.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 302/1098] blk.16.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 303/1098] blk.16.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 304/1098] blk.16.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 305/1098] blk.16.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 306/1098] blk.16.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 307/1098] blk.16.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 308/1098] blk.16.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 309/1098] blk.16.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 310/1098] blk.16.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 311/1098] blk.16.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 312/1098] blk.16.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 313/1098] blk.16.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 314/1098] blk.16.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 315/1098] blk.17.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 316/1098] blk.17.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 317/1098] blk.17.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 318/1098] blk.17.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 319/1098] blk.17.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 320/1098] blk.17.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 321/1098] blk.17.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 322/1098] blk.17.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 323/1098] blk.17.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 324/1098] blk.17.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 325/1098] blk.17.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 326/1098] blk.17.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 327/1098] blk.17.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 328/1098] blk.17.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 329/1098] blk.17.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 330/1098] blk.17.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 331/1098] blk.17.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 332/1098] blk.17.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 333/1098] blk.17.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 334/1098] blk.18.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 335/1098] blk.18.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 336/1098] blk.18.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 337/1098] blk.18.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 338/1098] blk.18.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 339/1098] blk.18.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 340/1098] blk.18.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 341/1098] blk.18.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 342/1098] blk.18.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 343/1098] blk.18.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 344/1098] blk.18.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 345/1098] blk.18.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 346/1098] blk.18.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 347/1098] blk.18.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 348/1098] blk.18.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 349/1098] blk.18.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 350/1098] blk.18.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 351/1098] blk.18.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 352/1098] blk.18.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 353/1098] blk.19.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 354/1098] blk.19.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 355/1098] blk.19.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 356/1098] blk.19.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 357/1098] blk.19.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 358/1098] blk.19.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 359/1098] blk.19.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 360/1098] blk.19.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 361/1098] blk.19.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 362/1098] blk.19.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 363/1098] blk.19.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 364/1098] blk.19.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 365/1098] blk.19.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 366/1098] blk.19.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 367/1098] blk.19.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 368/1098] blk.19.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 369/1098] blk.20.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 370/1098] blk.20.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 371/1098] blk.20.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 372/1098] blk.20.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 373/1098] blk.20.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 374/1098] blk.20.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 375/1098] blk.20.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 376/1098] blk.20.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 377/1098] blk.20.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 378/1098] blk.20.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 379/1098] blk.20.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 380/1098] blk.20.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 381/1098] blk.20.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 382/1098] blk.20.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 383/1098] blk.20.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 384/1098] blk.20.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 385/1098] blk.20.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 386/1098] blk.20.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 387/1098] blk.20.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 388/1098] blk.21.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 389/1098] blk.21.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 390/1098] blk.21.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 391/1098] blk.21.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 392/1098] blk.21.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 393/1098] blk.21.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 394/1098] blk.21.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 395/1098] blk.21.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 396/1098] blk.21.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 397/1098] blk.21.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 398/1098] blk.21.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 399/1098] blk.21.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 400/1098] blk.21.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 401/1098] blk.21.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 402/1098] blk.21.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 403/1098] blk.21.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 404/1098] blk.21.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 405/1098] blk.21.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 406/1098] blk.21.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 407/1098] blk.22.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 408/1098] blk.22.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 409/1098] blk.22.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 410/1098] blk.22.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 411/1098] blk.22.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 412/1098] blk.22.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 413/1098] blk.22.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 414/1098] blk.22.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 415/1098] blk.22.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 416/1098] blk.22.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 417/1098] blk.22.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 418/1098] blk.22.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 419/1098] blk.22.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 420/1098] blk.22.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 421/1098] blk.22.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 422/1098] blk.22.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 423/1098] blk.22.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 424/1098] blk.22.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 425/1098] blk.22.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 426/1098] blk.23.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 427/1098] blk.23.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 428/1098] blk.23.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 429/1098] blk.23.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 430/1098] blk.23.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 431/1098] blk.23.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 432/1098] blk.23.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 433/1098] blk.23.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 434/1098] blk.23.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 435/1098] blk.23.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 436/1098] blk.23.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 437/1098] blk.23.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 438/1098] blk.23.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 439/1098] blk.23.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 440/1098] blk.23.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 441/1098] blk.23.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 442/1098] blk.24.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 443/1098] blk.24.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 444/1098] blk.24.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 445/1098] blk.24.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 446/1098] blk.24.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 447/1098] blk.24.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 448/1098] blk.24.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 449/1098] blk.24.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 450/1098] blk.24.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 451/1098] blk.24.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 452/1098] blk.24.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 453/1098] blk.24.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 454/1098] blk.24.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 455/1098] blk.24.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 456/1098] blk.24.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 457/1098] blk.24.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 458/1098] blk.24.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 459/1098] blk.24.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 460/1098] blk.24.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 461/1098] blk.25.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 462/1098] blk.25.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 463/1098] blk.25.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 464/1098] blk.25.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 465/1098] blk.25.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 466/1098] blk.25.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 467/1098] blk.25.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 468/1098] blk.25.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 469/1098] blk.25.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 470/1098] blk.25.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 471/1098] blk.25.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 472/1098] blk.25.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 473/1098] blk.25.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 474/1098] blk.25.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 475/1098] blk.25.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 476/1098] blk.25.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 477/1098] blk.25.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 478/1098] blk.25.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 479/1098] blk.25.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 480/1098] blk.26.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 481/1098] blk.26.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 482/1098] blk.26.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 483/1098] blk.26.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 484/1098] blk.26.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 485/1098] blk.26.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 486/1098] blk.26.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 487/1098] blk.26.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 488/1098] blk.26.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 489/1098] blk.26.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 490/1098] blk.26.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 491/1098] blk.26.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 492/1098] blk.26.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 493/1098] blk.26.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 494/1098] blk.26.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 495/1098] blk.26.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 496/1098] blk.26.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 497/1098] blk.26.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 498/1098] blk.26.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 499/1098] blk.27.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 500/1098] blk.27.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 501/1098] blk.27.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 502/1098] blk.27.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 503/1098] blk.27.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 504/1098] blk.27.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 505/1098] blk.27.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 506/1098] blk.27.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 507/1098] blk.27.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 508/1098] blk.27.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 509/1098] blk.27.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 510/1098] blk.27.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 511/1098] blk.27.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 512/1098] blk.27.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 513/1098] blk.27.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 514/1098] blk.27.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 515/1098] blk.28.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 516/1098] blk.28.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 517/1098] blk.28.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 518/1098] blk.28.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 519/1098] blk.28.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 520/1098] blk.28.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 521/1098] blk.28.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 522/1098] blk.28.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 523/1098] blk.28.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 524/1098] blk.28.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 525/1098] blk.28.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 526/1098] blk.28.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 527/1098] blk.28.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 528/1098] blk.28.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 529/1098] blk.28.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 530/1098] blk.28.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 531/1098] blk.28.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 532/1098] blk.28.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 533/1098] blk.28.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 534/1098] blk.29.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 535/1098] blk.29.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 536/1098] blk.29.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 537/1098] blk.29.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 538/1098] blk.29.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 539/1098] blk.29.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 540/1098] blk.29.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 541/1098] blk.29.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 542/1098] blk.29.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 543/1098] blk.29.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 544/1098] blk.29.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 545/1098] blk.29.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 546/1098] blk.29.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 547/1098] blk.29.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 548/1098] blk.29.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 549/1098] blk.29.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 550/1098] blk.29.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 551/1098] blk.29.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 552/1098] blk.29.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 553/1098] blk.30.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 554/1098] blk.30.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 555/1098] blk.30.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 556/1098] blk.30.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 557/1098] blk.30.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 558/1098] blk.30.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 559/1098] blk.30.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 560/1098] blk.30.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 561/1098] blk.30.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 562/1098] blk.30.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 563/1098] blk.30.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 564/1098] blk.30.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 565/1098] blk.30.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 566/1098] blk.30.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 567/1098] blk.30.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 568/1098] blk.30.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 569/1098] blk.30.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 570/1098] blk.30.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 571/1098] blk.30.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 572/1098] blk.31.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 573/1098] blk.31.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 574/1098] blk.31.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 575/1098] blk.31.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 576/1098] blk.31.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 577/1098] blk.31.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 578/1098] blk.31.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 579/1098] blk.31.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 580/1098] blk.31.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 581/1098] blk.31.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 582/1098] blk.31.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 583/1098] blk.31.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 584/1098] blk.31.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 585/1098] blk.31.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 586/1098] blk.31.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 587/1098] blk.31.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 588/1098] blk.32.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 589/1098] blk.32.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 590/1098] blk.32.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 591/1098] blk.32.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 592/1098] blk.32.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 593/1098] blk.32.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 594/1098] blk.32.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 595/1098] blk.32.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 596/1098] blk.32.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 597/1098] blk.32.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 598/1098] blk.32.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 599/1098] blk.32.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 600/1098] blk.32.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 601/1098] blk.32.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 602/1098] blk.32.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 603/1098] blk.32.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 604/1098] blk.32.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 605/1098] blk.32.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 606/1098] blk.32.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 607/1098] blk.33.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 608/1098] blk.33.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 609/1098] blk.33.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 610/1098] blk.33.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 611/1098] blk.33.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 612/1098] blk.33.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 613/1098] blk.33.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 614/1098] blk.33.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 615/1098] blk.33.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 616/1098] blk.33.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 617/1098] blk.33.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 618/1098] blk.33.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 619/1098] blk.33.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 620/1098] blk.33.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 621/1098] blk.33.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 622/1098] blk.33.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 623/1098] blk.33.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 624/1098] blk.33.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 625/1098] blk.33.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 626/1098] blk.34.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 627/1098] blk.34.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 628/1098] blk.34.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 629/1098] blk.34.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 630/1098] blk.34.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 631/1098] blk.34.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 632/1098] blk.34.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 633/1098] blk.34.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 634/1098] blk.34.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 635/1098] blk.34.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 636/1098] blk.34.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 637/1098] blk.34.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 638/1098] blk.34.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 639/1098] blk.34.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 640/1098] blk.34.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 641/1098] blk.34.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 642/1098] blk.34.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 643/1098] blk.34.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 644/1098] blk.34.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 645/1098] blk.35.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 646/1098] blk.35.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 647/1098] blk.35.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 648/1098] blk.35.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 649/1098] blk.35.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 650/1098] blk.35.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 651/1098] blk.35.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 652/1098] blk.35.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 653/1098] blk.35.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 654/1098] blk.35.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 655/1098] blk.35.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 656/1098] blk.35.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 657/1098] blk.35.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 658/1098] blk.35.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 659/1098] blk.35.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 660/1098] blk.35.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 661/1098] blk.36.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 662/1098] blk.36.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 663/1098] blk.36.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 664/1098] blk.36.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 665/1098] blk.36.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 666/1098] blk.36.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 667/1098] blk.36.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 668/1098] blk.36.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 669/1098] blk.36.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 670/1098] blk.36.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 671/1098] blk.36.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 672/1098] blk.36.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 673/1098] blk.36.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 674/1098] blk.36.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 675/1098] blk.36.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 676/1098] blk.36.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 677/1098] blk.36.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 678/1098] blk.36.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 679/1098] blk.36.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 680/1098] blk.37.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 681/1098] blk.37.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 682/1098] blk.37.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 683/1098] blk.37.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 684/1098] blk.37.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 685/1098] blk.37.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 686/1098] blk.37.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 687/1098] blk.37.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 688/1098] blk.37.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 689/1098] blk.37.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 690/1098] blk.37.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 691/1098] blk.37.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 692/1098] blk.37.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 693/1098] blk.37.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 694/1098] blk.37.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 695/1098] blk.37.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 696/1098] blk.37.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 697/1098] blk.37.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 698/1098] blk.37.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 699/1098] blk.38.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 700/1098] blk.38.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 701/1098] blk.38.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 702/1098] blk.38.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 703/1098] blk.38.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 704/1098] blk.38.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 705/1098] blk.38.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 706/1098] blk.38.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 707/1098] blk.38.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 708/1098] blk.38.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 709/1098] blk.38.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 710/1098] blk.38.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 711/1098] blk.38.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 712/1098] blk.38.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 713/1098] blk.38.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 714/1098] blk.38.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 715/1098] blk.38.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 716/1098] blk.38.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 717/1098] blk.38.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 718/1098] blk.39.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 719/1098] blk.39.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 720/1098] blk.39.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 721/1098] blk.39.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 722/1098] blk.39.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 723/1098] blk.39.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 724/1098] blk.39.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 725/1098] blk.39.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 726/1098] blk.39.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 727/1098] blk.39.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 728/1098] blk.39.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 729/1098] blk.39.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 730/1098] blk.39.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 731/1098] blk.39.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 732/1098] blk.39.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 733/1098] blk.39.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 734/1098] blk.40.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 735/1098] blk.40.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 736/1098] blk.40.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 737/1098] blk.40.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 738/1098] blk.40.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 739/1098] blk.40.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 740/1098] blk.40.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 741/1098] blk.40.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 742/1098] blk.40.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 743/1098] blk.40.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 744/1098] blk.40.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 745/1098] blk.40.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 746/1098] blk.40.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 747/1098] blk.40.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 748/1098] blk.40.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 749/1098] blk.40.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 750/1098] blk.40.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 751/1098] blk.40.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 752/1098] blk.40.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 753/1098] blk.41.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 754/1098] blk.41.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 755/1098] blk.41.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 756/1098] blk.41.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 757/1098] blk.41.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 758/1098] blk.41.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 759/1098] blk.41.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 760/1098] blk.41.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 761/1098] blk.41.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 762/1098] blk.41.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 763/1098] blk.41.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 764/1098] blk.41.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 765/1098] blk.41.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 766/1098] blk.41.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 767/1098] blk.41.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 768/1098] blk.41.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 769/1098] blk.41.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 770/1098] blk.41.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 771/1098] blk.41.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 772/1098] blk.42.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 773/1098] blk.42.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 774/1098] blk.42.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 775/1098] blk.42.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 776/1098] blk.42.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 777/1098] blk.42.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 778/1098] blk.42.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 779/1098] blk.42.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 780/1098] blk.42.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 781/1098] blk.42.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 782/1098] blk.42.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 783/1098] blk.42.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 784/1098] blk.42.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 785/1098] blk.42.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 786/1098] blk.42.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 787/1098] blk.42.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 788/1098] blk.42.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 789/1098] blk.42.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 790/1098] blk.42.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 791/1098] blk.43.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 792/1098] blk.43.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 793/1098] blk.43.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 794/1098] blk.43.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 795/1098] blk.43.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 796/1098] blk.43.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 797/1098] blk.43.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 798/1098] blk.43.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 799/1098] blk.43.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 800/1098] blk.43.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 801/1098] blk.43.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 802/1098] blk.43.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 803/1098] blk.43.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 804/1098] blk.43.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 805/1098] blk.43.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 806/1098] blk.43.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 807/1098] blk.44.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 808/1098] blk.44.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 809/1098] blk.44.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 810/1098] blk.44.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 811/1098] blk.44.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 812/1098] blk.44.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 813/1098] blk.44.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 814/1098] blk.44.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 815/1098] blk.44.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 816/1098] blk.44.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 817/1098] blk.44.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 818/1098] blk.44.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 819/1098] blk.44.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 820/1098] blk.44.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 821/1098] blk.44.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 822/1098] blk.44.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 823/1098] blk.44.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 824/1098] blk.44.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 825/1098] blk.44.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 826/1098] blk.45.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 827/1098] blk.45.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 828/1098] blk.45.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 829/1098] blk.45.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 830/1098] blk.45.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 831/1098] blk.45.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 832/1098] blk.45.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 833/1098] blk.45.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 834/1098] blk.45.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 835/1098] blk.45.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 836/1098] blk.45.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 837/1098] blk.45.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 838/1098] blk.45.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 839/1098] blk.45.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 840/1098] blk.45.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 841/1098] blk.45.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 842/1098] blk.45.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 843/1098] blk.45.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 844/1098] blk.45.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 845/1098] blk.46.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 846/1098] blk.46.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 847/1098] blk.46.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 848/1098] blk.46.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 849/1098] blk.46.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 850/1098] blk.46.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 851/1098] blk.46.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 852/1098] blk.46.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 853/1098] blk.46.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 854/1098] blk.46.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 855/1098] blk.46.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 856/1098] blk.46.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 857/1098] blk.46.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 858/1098] blk.46.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 859/1098] blk.46.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 860/1098] blk.46.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 861/1098] blk.46.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 862/1098] blk.46.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 863/1098] blk.46.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 864/1098] blk.47.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 865/1098] blk.47.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 866/1098] blk.47.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 867/1098] blk.47.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 868/1098] blk.47.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 869/1098] blk.47.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 870/1098] blk.47.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 871/1098] blk.47.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 872/1098] blk.47.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 873/1098] blk.47.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 874/1098] blk.47.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 875/1098] blk.47.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 876/1098] blk.47.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 877/1098] blk.47.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 878/1098] blk.47.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 879/1098] blk.47.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 880/1098] blk.48.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 881/1098] blk.48.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 882/1098] blk.48.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 883/1098] blk.48.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 884/1098] blk.48.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 885/1098] blk.48.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 886/1098] blk.48.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 887/1098] blk.48.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 888/1098] blk.48.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 889/1098] blk.48.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 890/1098] blk.48.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 891/1098] blk.48.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 892/1098] blk.48.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 893/1098] blk.48.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 894/1098] blk.48.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 895/1098] blk.48.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 896/1098] blk.48.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 897/1098] blk.48.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 898/1098] blk.48.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 899/1098] blk.49.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 900/1098] blk.49.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 901/1098] blk.49.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 902/1098] blk.49.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 903/1098] blk.49.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 904/1098] blk.49.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 905/1098] blk.49.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 906/1098] blk.49.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 907/1098] blk.49.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 908/1098] blk.49.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 909/1098] blk.49.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 910/1098] blk.49.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 911/1098] blk.49.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 912/1098] blk.49.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 913/1098] blk.49.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 914/1098] blk.49.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 915/1098] blk.49.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 916/1098] blk.49.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 917/1098] blk.49.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 918/1098] blk.50.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 919/1098] blk.50.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 920/1098] blk.50.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 921/1098] blk.50.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 922/1098] blk.50.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 923/1098] blk.50.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 924/1098] blk.50.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 925/1098] blk.50.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 926/1098] blk.50.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 927/1098] blk.50.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 928/1098] blk.50.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 929/1098] blk.50.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 930/1098] blk.50.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 931/1098] blk.50.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 932/1098] blk.50.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 933/1098] blk.50.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 934/1098] blk.50.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 935/1098] blk.50.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 936/1098] blk.50.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 937/1098] blk.51.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 938/1098] blk.51.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 939/1098] blk.51.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 940/1098] blk.51.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 941/1098] blk.51.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [ 942/1098] blk.51.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [ 943/1098] blk.51.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [ 944/1098] blk.51.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 945/1098] blk.51.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 946/1098] blk.51.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 947/1098] blk.51.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 948/1098] blk.51.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 949/1098] blk.51.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 950/1098] blk.51.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 951/1098] blk.51.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 952/1098] blk.51.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 953/1098] blk.52.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 954/1098] blk.52.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 955/1098] blk.52.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 956/1098] blk.52.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 957/1098] blk.52.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 958/1098] blk.52.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 959/1098] blk.52.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 960/1098] blk.52.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 961/1098] blk.52.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 962/1098] blk.52.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 963/1098] blk.52.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 964/1098] blk.52.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 965/1098] blk.52.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 966/1098] blk.52.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 967/1098] blk.52.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 968/1098] blk.52.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 969/1098] blk.52.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 970/1098] blk.52.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 971/1098] blk.52.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 972/1098] blk.53.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 973/1098] blk.53.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 974/1098] blk.53.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 975/1098] blk.53.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 976/1098] blk.53.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 977/1098] blk.53.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 978/1098] blk.53.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 979/1098] blk.53.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 980/1098] blk.53.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 981/1098] blk.53.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 982/1098] blk.53.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 983/1098] blk.53.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 984/1098] blk.53.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 985/1098] blk.53.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 986/1098] blk.53.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [ 987/1098] blk.53.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [ 988/1098] blk.53.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [ 989/1098] blk.53.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [ 990/1098] blk.53.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 991/1098] blk.54.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [ 992/1098] blk.54.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 993/1098] blk.54.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [ 994/1098] blk.54.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [ 995/1098] blk.54.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [ 996/1098] blk.54.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [ 997/1098] blk.54.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [ 998/1098] blk.54.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [ 999/1098] blk.54.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1000/1098] blk.54.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1001/1098] blk.54.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1002/1098] blk.54.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1003/1098] blk.54.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [1004/1098] blk.54.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [1005/1098] blk.54.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [1006/1098] blk.54.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [1007/1098] blk.54.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [1008/1098] blk.54.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [1009/1098] blk.54.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1010/1098] blk.55.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [1011/1098] blk.55.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [1012/1098] blk.55.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1013/1098] blk.55.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1014/1098] blk.55.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [1015/1098] blk.55.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [1016/1098] blk.55.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [1017/1098] blk.55.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [1018/1098] blk.55.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1019/1098] blk.55.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1020/1098] blk.55.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [1021/1098] blk.55.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1022/1098] blk.55.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1023/1098] blk.55.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1024/1098] blk.55.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1025/1098] blk.55.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1026/1098] blk.56.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1027/1098] blk.56.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1028/1098] blk.56.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [1029/1098] blk.56.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [1030/1098] blk.56.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1031/1098] blk.56.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1032/1098] blk.56.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [1033/1098] blk.56.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1034/1098] blk.56.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1035/1098] blk.56.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1036/1098] blk.56.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1037/1098] blk.56.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1038/1098] blk.56.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [1039/1098] blk.56.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [1040/1098] blk.56.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [1041/1098] blk.56.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [1042/1098] blk.56.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [1043/1098] blk.56.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [1044/1098] blk.56.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1045/1098] blk.57.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1046/1098] blk.57.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1047/1098] blk.57.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [1048/1098] blk.57.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [1049/1098] blk.57.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1050/1098] blk.57.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1051/1098] blk.57.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [1052/1098] blk.57.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1053/1098] blk.57.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1054/1098] blk.57.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1055/1098] blk.57.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1056/1098] blk.57.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1057/1098] blk.57.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [1058/1098] blk.57.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [1059/1098] blk.57.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [1060/1098] blk.57.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [1061/1098] blk.57.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [1062/1098] blk.57.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [1063/1098] blk.57.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1064/1098] blk.58.attn_gate.weight - [ 4096, 8192, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1065/1098] blk.58.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1066/1098] blk.58.attn_qkv.weight - [ 4096, 12288, 1, 1], type = bf16, converting to q8_0 .. size = 96.00 MiB -> 51.00 MiB [1067/1098] blk.58.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [1068/1098] blk.58.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1069/1098] blk.58.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1070/1098] blk.58.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [1071/1098] blk.58.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1072/1098] blk.58.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1073/1098] blk.58.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1074/1098] blk.58.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1075/1098] blk.58.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1076/1098] blk.58.ssm_a - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [1077/1098] blk.58.ssm_alpha.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [1078/1098] blk.58.ssm_beta.weight - [ 4096, 64, 1, 1], type = bf16, converting to q8_0 .. size = 0.50 MiB -> 0.27 MiB [1079/1098] blk.58.ssm_conv1d.weight - [ 4, 12288, 1, 1], type = f32, size = 0.188 MiB [1080/1098] blk.58.ssm_dt.bias - [ 64, 1, 1, 1], type = f32, size = 0.000 MiB [1081/1098] blk.58.ssm_norm.weight - [ 128, 1, 1, 1], type = f32, size = 0.000 MiB [1082/1098] blk.58.ssm_out.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1083/1098] blk.59.attn_k.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [1084/1098] blk.59.attn_k_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [1085/1098] blk.59.attn_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1086/1098] blk.59.attn_output.weight - [ 8192, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 64.00 MiB -> 34.00 MiB [1087/1098] blk.59.attn_q.weight - [ 4096, 16384, 1, 1], type = bf16, converting to q8_0 .. size = 128.00 MiB -> 68.00 MiB [1088/1098] blk.59.attn_q_norm.weight - [ 256, 1, 1, 1], type = f32, size = 0.001 MiB [1089/1098] blk.59.attn_v.weight - [ 4096, 512, 1, 1], type = bf16, converting to q8_0 .. size = 4.00 MiB -> 2.12 MiB [1090/1098] blk.59.ffn_down_exps.weight - [ 1024, 4096, 512, 1], type = bf16, (manual override: q8_0 -> q4_K) converting to q4_K .. size = 4096.00 MiB -> 1152.00 MiB [1091/1098] blk.59.ffn_down_shexp.weight - [ 1024, 4096, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1092/1098] blk.59.ffn_gate_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1093/1098] blk.59.ffn_gate_inp.weight - [ 4096, 512, 1, 1], type = f32, size = 8.000 MiB [1094/1098] blk.59.ffn_gate_inp_shexp.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB [1095/1098] blk.59.ffn_gate_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1096/1098] blk.59.ffn_up_exps.weight - [ 4096, 1024, 512, 1], type = bf16, (manual override: q8_0 -> q3_K) converting to q3_K .. size = 4096.00 MiB -> 880.00 MiB [1097/1098] blk.59.ffn_up_shexp.weight - [ 4096, 1024, 1, 1], type = bf16, converting to q8_0 .. size = 8.00 MiB -> 4.25 MiB [1098/1098] blk.59.post_attention_norm.weight - [ 4096, 1, 1, 1], type = f32, size = 0.016 MiB llama_model_quantize_impl: model size = 756216.34 MiB llama_model_quantize_impl: quant size = 184290.32 MiB main: quantize time = 693851.09 ms main: total time = 693851.09 ms