{ "base_model": "Qwen/Qwen3.5-27B", "format": "dashq-packed-linear", "format_version": 1, "method": "dashq", "model_class": "image_text_to_text", "params": { "bits": 3, "group_size": 128, "low_memory_optimization": false, "moe_hessian_scope": "shared", "n_samples": 128, "scale_zero_dtype": "float16", "symmetric": false, "use_error_compensation": true, "use_optimal_shrinkage": true, "use_weighted_quantization": true }, "quantized_modules": { "model.language_model.layers.0.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.0.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.0.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.0.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.0.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.0.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.0.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.0.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.1.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.1.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.1.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.1.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.1.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.1.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.1.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.1.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.10.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.10.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.10.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.10.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.10.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.10.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.10.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.10.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.11.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.11.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.11.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.11.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.11.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.11.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.11.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.12.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.12.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.12.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.12.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.12.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.12.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.12.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.12.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.13.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.13.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.13.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.13.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.13.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.13.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.13.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.13.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.14.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.14.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.14.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.14.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.14.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.14.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.14.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.14.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.15.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.15.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.15.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.15.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.15.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.15.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.15.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.16.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.16.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.16.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.16.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.16.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.16.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.16.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.16.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.17.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.17.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.17.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.17.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.17.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.17.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.17.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.17.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.18.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.18.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.18.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.18.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.18.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.18.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.18.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.18.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.19.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.19.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.19.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.19.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.19.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.19.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.19.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.2.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.2.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.2.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.2.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.2.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.2.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.2.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.2.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.20.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.20.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.20.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.20.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.20.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.20.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.20.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.20.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.21.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.21.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.21.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.21.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.21.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.21.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.21.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.21.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.22.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.22.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.22.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.22.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.22.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.22.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.22.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.22.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.23.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.23.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.23.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.23.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.23.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.23.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.23.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.24.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.24.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.24.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.24.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.24.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.24.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.24.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.24.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.25.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.25.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.25.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.25.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.25.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.25.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.25.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.25.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.26.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.26.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.26.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.26.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.26.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.26.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.26.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.26.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.27.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.27.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.27.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.27.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.27.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.27.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.27.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.28.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.28.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.28.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.28.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.28.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.28.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.28.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.28.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.29.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.29.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.29.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.29.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.29.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.29.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.29.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.29.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.3.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.3.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.3.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.3.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.3.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.3.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.3.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.30.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.30.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.30.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.30.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.30.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.30.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.30.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.30.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.31.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.31.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.31.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.31.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.31.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.31.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.31.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.32.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.32.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.32.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.32.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.32.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.32.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.32.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.32.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.33.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.33.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.33.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.33.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.33.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.33.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.33.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.33.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.34.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.34.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.34.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.34.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.34.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.34.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.34.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.34.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.35.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.35.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.35.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.35.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.35.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.35.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.35.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.36.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.36.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.36.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.36.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.36.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.36.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.36.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.36.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.37.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.37.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.37.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.37.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.37.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.37.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.37.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.37.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.38.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.38.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.38.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.38.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.38.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.38.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.38.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.38.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.39.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.39.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.39.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.39.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.39.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.39.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.39.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.4.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.4.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.4.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.4.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.4.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.4.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.4.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.4.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.40.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.40.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.40.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.40.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.40.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.40.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.40.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.40.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.41.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.41.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.41.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.41.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.41.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.41.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.41.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.41.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.42.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.42.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.42.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.42.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.42.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.42.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.42.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.42.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.43.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.43.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.43.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.43.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.43.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.43.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.43.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.44.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.44.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.44.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.44.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.44.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.44.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.44.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.44.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.45.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.45.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.45.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.45.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.45.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.45.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.45.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.45.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.46.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.46.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.46.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.46.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.46.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.46.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.46.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.46.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.47.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.47.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.47.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.47.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.47.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.47.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.47.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.48.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.48.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.48.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.48.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.48.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.48.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.48.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.48.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.49.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.49.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.49.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.49.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.49.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.49.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.49.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.49.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.5.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.5.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.5.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.5.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.5.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.5.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.5.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.5.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.50.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.50.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.50.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.50.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.50.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.50.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.50.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.50.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.51.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.51.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.51.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.51.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.51.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.51.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.51.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.52.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.52.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.52.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.52.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.52.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.52.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.52.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.52.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.53.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.53.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.53.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.53.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.53.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.53.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.53.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.53.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.54.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.54.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.54.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.54.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.54.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.54.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.54.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.54.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.55.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.55.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.55.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.55.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.55.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.55.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.55.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.56.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.56.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.56.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.56.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.56.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.56.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.56.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.56.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.57.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.57.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.57.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.57.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.57.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.57.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.57.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.57.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.58.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.58.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.58.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.58.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.58.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.58.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.58.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.58.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.59.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.59.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.59.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.59.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.59.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.59.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.59.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.6.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.6.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.6.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.6.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.6.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.6.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.6.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.6.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.60.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.60.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.60.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.60.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.60.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.60.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.60.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.60.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.61.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.61.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.61.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.61.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.61.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.61.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.61.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.61.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.62.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.62.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.62.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.62.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.62.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.62.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.62.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.62.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.63.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.63.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.63.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.63.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.63.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.63.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.63.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.7.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.7.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.7.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.7.self_attn.k_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.7.self_attn.o_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.7.self_attn.q_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 491520, "out_features": 12288, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.7.self_attn.v_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 40960, "out_features": 1024, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.8.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.8.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.8.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.8.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.8.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.8.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.8.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.8.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.9.linear_attn.in_proj_a": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.9.linear_attn.in_proj_b": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 1920, "out_features": 48, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.9.linear_attn.in_proj_qkv": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 409600, "out_features": 10240, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.9.linear_attn.in_proj_z": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 6144, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.9.linear_attn.out_proj": { "group_size": 128, "in_features": 6144, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 245760, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 6144, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.9.mlp.down_proj": { "group_size": 128, "in_features": 17408, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 5120, "packing": "int3_packed_u32", "quant_in_features": 17408, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.9.mlp.gate_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" }, "model.language_model.layers.9.mlp.up_proj": { "group_size": 128, "in_features": 5120, "linear_dtype": "bfloat16", "nbits": 3, "num_groups": 696320, "out_features": 17408, "packing": "int3_packed_u32", "quant_in_features": 5120, "runtime_backend": "torch", "scale_zero_dtype": "float16" } }, "results": { "Method": "dashq", "Model": "Qwen/Qwen3.5-27B", "ModelSizeGB": 16.513800664, "OriginalSizeGB": 55.563022432, "PPL": 7.324132919311523, "Params": "{'bits': 3, 'group_size': 128, 'scale_zero_dtype': 'float16', 'n_samples': 128, 'moe_hessian_scope': 'shared', 'use_error_compensation': True, 'use_optimal_shrinkage': True, 'use_weighted_quantization': True, 'symmetric': False, 'low_memory_optimization': False}", "QuantTime": 820.9418406486511, "arc_challenge": 62.20136518771331, "arc_easy": 82.6178451178451, "commonsense_qa": 85.25798525798525, "gsm8k_cot": 89.23426838514025, "hellaswag": 82.01553475403306, "lambada_openai": 75.0630700562779, "mmlu": 85.37957555903718, "openbookqa": 45.800000000000004, "piqa": 81.39281828073993, "truthfulqa_mc2": 58.073103317159976, "winogrande": 77.82162588792423, "zeroshot_avg": 72.24926087329764 } }