{ "architectures": [ "Mamba2ForCausalLM" ], "bos_token_id": 0, "chunk_size": 256, "conv_kernel": 4, "eos_token_id": 0, "expand": 2, "head_dim": 64, "hidden_act": "silu", "hidden_size": 2048, "initializer_range": 0.1, "layer_norm_epsilon": 1e-05, "model_type": "mamba2", "n_groups": 1, "num_heads": 64, "num_hidden_layers": 48, "pad_token_id": 0, "rescale_prenorm_residual": false, "residual_in_fp32": true, "rms_norm": true, "state_size": 128, "tie_word_embeddings": true, "time_step_floor": 0.0001, "time_step_max": 0.1, "time_step_min": 0.001, "time_step_rank": 256, "torch_dtype": "float32", "transformers_version": "4.46.0", "use_bias": false, "use_cache": true, "use_conv_bias": true, "vocab_size": 50288, "quantization_config": { "quant_method": "hxq", "bits": 8, "n_clusters": 256, "compressed_modules": [ "backbone.layers.0.mixer.in_proj", "backbone.layers.0.mixer.out_proj", "backbone.layers.1.mixer.in_proj", "backbone.layers.1.mixer.out_proj", "backbone.layers.2.mixer.in_proj", "backbone.layers.2.mixer.out_proj", "backbone.layers.3.mixer.in_proj", "backbone.layers.3.mixer.out_proj", "backbone.layers.4.mixer.in_proj", "backbone.layers.4.mixer.out_proj", "backbone.layers.5.mixer.in_proj", "backbone.layers.5.mixer.out_proj", "backbone.layers.6.mixer.in_proj", "backbone.layers.6.mixer.out_proj", "backbone.layers.7.mixer.in_proj", "backbone.layers.7.mixer.out_proj", "backbone.layers.8.mixer.in_proj", "backbone.layers.8.mixer.out_proj", "backbone.layers.9.mixer.in_proj", "backbone.layers.9.mixer.out_proj", "backbone.layers.10.mixer.in_proj", "backbone.layers.10.mixer.out_proj", "backbone.layers.11.mixer.in_proj", "backbone.layers.11.mixer.out_proj", "backbone.layers.12.mixer.in_proj", "backbone.layers.12.mixer.out_proj", "backbone.layers.13.mixer.in_proj", "backbone.layers.13.mixer.out_proj", "backbone.layers.14.mixer.in_proj", "backbone.layers.14.mixer.out_proj", "backbone.layers.15.mixer.in_proj", "backbone.layers.15.mixer.out_proj", "backbone.layers.16.mixer.in_proj", "backbone.layers.16.mixer.out_proj", "backbone.layers.17.mixer.in_proj", "backbone.layers.17.mixer.out_proj", "backbone.layers.18.mixer.in_proj", "backbone.layers.18.mixer.out_proj", "backbone.layers.19.mixer.in_proj", "backbone.layers.19.mixer.out_proj", "backbone.layers.20.mixer.in_proj", "backbone.layers.20.mixer.out_proj", "backbone.layers.21.mixer.in_proj", "backbone.layers.21.mixer.out_proj", "backbone.layers.22.mixer.in_proj", "backbone.layers.22.mixer.out_proj", "backbone.layers.23.mixer.in_proj", "backbone.layers.23.mixer.out_proj", "backbone.layers.24.mixer.in_proj", "backbone.layers.24.mixer.out_proj", "backbone.layers.25.mixer.in_proj", "backbone.layers.25.mixer.out_proj", "backbone.layers.26.mixer.in_proj", "backbone.layers.26.mixer.out_proj", "backbone.layers.27.mixer.in_proj", "backbone.layers.27.mixer.out_proj", "backbone.layers.28.mixer.in_proj", "backbone.layers.28.mixer.out_proj", "backbone.layers.29.mixer.in_proj", "backbone.layers.29.mixer.out_proj", "backbone.layers.30.mixer.in_proj", "backbone.layers.30.mixer.out_proj", "backbone.layers.31.mixer.in_proj", "backbone.layers.31.mixer.out_proj", "backbone.layers.32.mixer.in_proj", "backbone.layers.32.mixer.out_proj", "backbone.layers.33.mixer.in_proj", "backbone.layers.33.mixer.out_proj", "backbone.layers.34.mixer.in_proj", "backbone.layers.34.mixer.out_proj", "backbone.layers.35.mixer.in_proj", "backbone.layers.35.mixer.out_proj", "backbone.layers.36.mixer.in_proj", "backbone.layers.36.mixer.out_proj", "backbone.layers.37.mixer.in_proj", "backbone.layers.37.mixer.out_proj", "backbone.layers.38.mixer.in_proj", "backbone.layers.38.mixer.out_proj", "backbone.layers.39.mixer.in_proj", "backbone.layers.39.mixer.out_proj", "backbone.layers.40.mixer.in_proj", "backbone.layers.40.mixer.out_proj", "backbone.layers.41.mixer.in_proj", "backbone.layers.41.mixer.out_proj", "backbone.layers.42.mixer.in_proj", "backbone.layers.42.mixer.out_proj", "backbone.layers.43.mixer.in_proj", "backbone.layers.43.mixer.out_proj", "backbone.layers.44.mixer.in_proj", "backbone.layers.44.mixer.out_proj", "backbone.layers.45.mixer.in_proj", "backbone.layers.45.mixer.out_proj", "backbone.layers.46.mixer.in_proj", "backbone.layers.46.mixer.out_proj", "backbone.layers.47.mixer.in_proj", "backbone.layers.47.mixer.out_proj", "lm_head" ], "compression_ratio": 4.0, "n_svd_routed": 0 } }