{ "mlp_bias": false, "attn_bias": false, "rotary_base": 500000.0, "rotary_scaling": { "factor": 32.0, "high_freq_factor": 4.0, "low_freq_factor": 1.0, "original_max_position_embeddings": 8192, "rope_type": "llama3" }, "residual_mlp": false, "disable_weight_only_quant_plugin": false, "moe": { "num_experts": 0, "shared_expert_intermediate_size": 0, "top_k": 0, "normalization_mode": null, "sparse_mixer_epsilon": 0.01, "tp_mode": 0, "device_limited_n_group": 0, "device_limited_topk_group": 0, "device_limited_routed_scaling_factor": 1.0 }, "remove_duplicated_kv_heads": false, "fc_after_embed": false, "use_input_layernorm_in_first_layer": true, "use_last_layernorm": true, "layer_idx_offset": 0, "embedding_multiplier": 1.0, "attention_multiplier": 1.0, "residual_multiplier": 1.0, "output_multiplier_scale": 1.0, "has_partial_lora_mask": false, "architecture": "LlamaForCausalLM", "dtype": "float16", "vocab_size": 128256, "hidden_size": 3072, "num_hidden_layers": 28, "num_attention_heads": 24, "hidden_act": "silu", "logits_dtype": "float32", "norm_epsilon": 1e-05, "runtime_defaults": null, "position_embedding_type": "rope_gpt_neox", "num_key_value_heads": 8, "intermediate_size": 8192, "max_position_embeddings": 131072, "mapping": { "world_size": 1, "gpus_per_node": 8, "cp_size": 1, "tp_size": 1, "pp_size": 1, "moe_tp_size": 1, "moe_cluster_size": 1, "moe_ep_size": 1, "attn_tp_size": -1, "attn_cp_size": -1, "cp_config": {}, "auto_parallel": false, "enable_attention_dp": false, "enable_lm_head_tp_in_adp": false }, "quantization": { "quant_algo": "NVFP4", "kv_cache_quant_algo": "FP8", "group_size": 128, "smoothquant_val": 0.5, "clamp_val": null, "use_meta_recipe": false, "has_zero_point": false, "pre_quant_scale": false, "exclude_modules": null, "mamba_ssm_cache_dtype": null }, "use_parallel_embedding": false, "embedding_sharding_dim": 0, "head_size": 128, "qk_layernorm": false, "rotary_embedding_dim": 128, "tie_word_embeddings": true }