default_stage: default_modifiers: SmoothQuantModifier: smoothing_strength: 0.8 mappings: - - ['re:.*model\.language_model\.layers\.0\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.0\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.0\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.1\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.1\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.1\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.2\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.2\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.2\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.3\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.3\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.3\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.3\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.3\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.3\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.3\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.4\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.4\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.4\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.5\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.5\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.5\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.6\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.6\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.6\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.7\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.7\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.7\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.7\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.7\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.7\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.7\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.8\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.8\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.8\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.9\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.9\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.9\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.10\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.10\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.10\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.11\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.11\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.11\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.11\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.11\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.11\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.11\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.12\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.12\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.12\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.13\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.13\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.13\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.14\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.14\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.14\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.15\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.15\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.15\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.15\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.15\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.15\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.15\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.16\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.16\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.16\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.17\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.17\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.17\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.18\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.18\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.18\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.19\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.19\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.19\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.19\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.19\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.19\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.19\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.20\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.20\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.20\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.21\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.21\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.21\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.22\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.22\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.22\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.23\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.23\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.23\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.23\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.23\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.23\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.23\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.24\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.24\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.24\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.25\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.25\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.25\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.26\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.26\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.26\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.27\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.27\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.27\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.27\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.27\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.27\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.27\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.28\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.28\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.28\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.29\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.29\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.29\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.30\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.30\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.30\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.31\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.31\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.31\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.31\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.31\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.31\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.31\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.32\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.32\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.32\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.33\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.33\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.33\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.34\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.34\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.34\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.35\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.35\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.35\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.35\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.35\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.35\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.35\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.36\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.36\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.36\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.37\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.37\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.37\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.38\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.38\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.38\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.39\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.39\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.39\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.39\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.39\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.39\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.39\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.40\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.40\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.40\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.41\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.41\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.41\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.42\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.42\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.42\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.43\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.43\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.43\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.43\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.43\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.43\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.43\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.44\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.44\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.44\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.45\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.45\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.45\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.46\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.46\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.46\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.47\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.47\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.47\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.47\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.47\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.47\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.47\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.48\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.48\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.48\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.49\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.49\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.49\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.50\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.50\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.50\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.51\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.51\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.51\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.51\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.51\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.51\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.51\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.52\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.52\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.52\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.53\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.53\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.53\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.54\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.54\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.54\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.55\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.55\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.55\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.55\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.55\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.55\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.55\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.56\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.56\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.56\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.57\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.57\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.57\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.58\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.58\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.58\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.59\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.59\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.59\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.59\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.59\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.59\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.59\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.60\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.60\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.60\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.61\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.61\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.61\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.62\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.62\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.62\.post_attention_layernorm$ - - ['re:.*model\.language_model\.layers\.63\.self_attn\.q_proj$', 're:.*model\.language_model\.layers\.63\.self_attn\.k_proj$', 're:.*model\.language_model\.layers\.63\.self_attn\.v_proj$'] - re:.*model\.language_model\.layers\.63\.input_layernorm$ - - ['re:.*model\.language_model\.layers\.63\.mlp\.gate_proj$', 're:.*model\.language_model\.layers\.63\.mlp\.up_proj$'] - re:.*model\.language_model\.layers\.63\.post_attention_layernorm$ ignore: [] algorithm: smoothquant GPTQModifier: targets: [Linear] ignore: [lm_head, 're:model.visual.*', 're:.*embed_tokens$', 're:mtp.*'] scheme: NVFP4 bypass_divisibility_checks: false block_size: 128 dampening_frac: 0.01 actorder: static offload_hessians: false