{ "producer": { "name": "modelopt", "version": "0.25.0" }, "architecture": "LlamaForCausalLM", "dtype": "float16", "logits_dtype": "float16", "num_hidden_layers": 28, "num_attention_heads": 24, "num_key_value_heads": 8, "hidden_size": 3072, "norm_epsilon": 1e-05, "vocab_size": 128256, "max_position_embeddings": 131072, "hidden_act": "silu", "use_parallel_embedding": true, "embedding_sharding_dim": 0, "head_size": 128, "intermediate_size": 8192, "position_embedding_type": "rope_gpt_neox", "share_embedding_table": false, "residual_mlp": false, "bias": false, "rotary_pct": 1.0, "rank": 0, "decoder": "llama", "rmsnorm": true, "lm_head_bias": false, "mlp_bias": false, "attn_bias": false, "rotary_base": 500000.0, "rotary_scaling": { "factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3" }, "disable_weight_only_quant_plugin": false, "moe": { "num_experts": 0, "shared_expert_intermediate_size": 0, "top_k": 0, "normalization_mode": null, "sparse_mixer_epsilon": 0.01, "tp_mode": 0, "device_limited_n_group": 0, "device_limited_topk_group": 0, "device_limited_routed_scaling_factor": 1.0 }, "remove_duplicated_kv_heads": false, "fc_after_embed": false, "use_input_layernorm_in_first_layer": true, "use_last_layernorm": true, "layer_idx_offset": 0, "embedding_multiplier": 1.0, "attention_multiplier": 1.0, "residual_multiplier": 1.0, "output_multiplier_scale": 1.0, "runtime_defaults": null, "mapping": { "world_size": 1, "gpus_per_node": 8, "cp_size": 1, "tp_size": 1, "pp_size": 1, "moe_tp_size": 1, "moe_ep_size": 1, "auto_parallel": false }, "quantization": { "quant_algo": "W4A16_AWQ", "kv_cache_quant_algo": "INT8", "group_size": 128, "smoothquant_val": 0.5, "clamp_val": null, "use_meta_recipe": false, "has_zero_point": false, "pre_quant_scale": true, "exclude_modules": [ "transformer.layers.17.input_layernorm", "transformer.layers.1.input_layernorm", "transformer.layers.24.input_layernorm", "transformer.layers.1.post_layernorm", "transformer.layers.6.input_layernorm", "transformer.layers.14.post_layernorm", "lm_head", "transformer.layers.19.post_layernorm", "transformer.layers.22.input_layernorm", "transformer.layers.13.input_layernorm", "transformer.layers.4.post_layernorm", "transformer.layers.16.post_layernorm", "transformer.layers.8.input_layernorm", "transformer.layers.13.post_layernorm", "transformer.layers.20.post_layernorm", "transformer.layers.5.input_layernorm", "transformer.layers.12.post_layernorm", "transformer.layers.27.input_layernorm", "transformer.layers.14.input_layernorm", "transformer.layers.23.input_layernorm", "transformer.layers.10.input_layernorm", "transformer.layers.4.input_layernorm", "transformer.layers.7.post_layernorm", "transformer.layers.18.post_layernorm", "transformer.layers.15.post_layernorm", "transformer.layers.25.input_layernorm", "transformer.layers.21.input_layernorm", "transformer.layers.9.post_layernorm", "transformer.layers.11.post_layernorm", "transformer.layers.24.post_layernorm", "transformer.layers.25.post_layernorm", "transformer.layers.27.post_layernorm", "transformer.layers.26.post_layernorm", "transformer.layers.0.post_layernorm", "transformer.layers.9.input_layernorm", "transformer.layers.10.post_layernorm", "transformer.layers.20.input_layernorm", "transformer.layers.15.input_layernorm", "transformer.layers.7.input_layernorm", "transformer.layers.2.input_layernorm", "transformer.layers.12.input_layernorm", "transformer.layers.5.post_layernorm", "transformer.layers.3.post_layernorm", "transformer.layers.6.post_layernorm", "transformer.layers.17.post_layernorm", "transformer.layers.8.post_layernorm", "transformer.layers.18.input_layernorm", "transformer.layers.2.post_layernorm", "transformer.layers.11.input_layernorm", "transformer.layers.22.post_layernorm", "transformer.ln_f", "transformer.layers.21.post_layernorm", "transformer.layers.26.input_layernorm", "transformer.layers.19.input_layernorm", "transformer.layers.23.post_layernorm", "transformer.layers.3.input_layernorm", "transformer.layers.16.input_layernorm", "transformer.layers.0.input_layernorm", "transformer.vocab_embedding" ] }, "qk_layernorm": false, "rotary_embedding_dim": 128, "tie_word_embeddings": true, "model_type": "llama" }