diff --git "a/.quant_summary.txt" "b/.quant_summary.txt" new file mode 100644--- /dev/null +++ "b/.quant_summary.txt" @@ -0,0 +1,2257 @@ +model.visual.patch_embed.proj.input_quantizer TensorQuantizer(disabled) +model.visual.patch_embed.proj.output_quantizer TensorQuantizer(disabled) +model.visual.patch_embed.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.pos_embed.weight_quantizer TensorQuantizer(disabled) +model.visual.pos_embed.input_quantizer HardDisabledTensorQuantizer(disabled) +model.visual.pos_embed.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.0.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.1.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.2.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.3.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.4.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.5.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.6.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.7.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.8.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.9.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.10.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.11.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.12.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.13.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.14.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.15.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.16.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.17.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.18.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.19.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.20.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.21.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.22.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.23.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.24.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.25.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.norm1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.norm1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.norm2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.norm2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.qkv.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.qkv.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.qkv.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.proj.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.proj.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.proj.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.q_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.k_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.v_bmm_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.attn.softmax_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.mlp.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.mlp.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.mlp.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.mlp.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.mlp.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.blocks.26.mlp.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.visual.merger.norm.input_quantizer TensorQuantizer(disabled) +model.visual.merger.norm.output_quantizer TensorQuantizer(disabled) +model.visual.merger.linear_fc1.input_quantizer TensorQuantizer(disabled) +model.visual.merger.linear_fc1.output_quantizer TensorQuantizer(disabled) +model.visual.merger.linear_fc1.weight_quantizer TensorQuantizer(disabled) +model.visual.merger.linear_fc2.input_quantizer TensorQuantizer(disabled) +model.visual.merger.linear_fc2.output_quantizer TensorQuantizer(disabled) +model.visual.merger.linear_fc2.weight_quantizer TensorQuantizer(disabled) +model.language_model.embed_tokens.weight_quantizer TensorQuantizer(disabled) +model.language_model.embed_tokens.input_quantizer HardDisabledTensorQuantizer(disabled) +model.language_model.embed_tokens.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.81e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.0.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.25e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.0.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.12e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.0.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.51e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.0.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.12e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.0.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.34e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.0.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.08e-03, 4.06e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.0.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.53e-03, 1.60e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.0.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.0.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.21e-03, 9.34e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.1.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.41e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.1.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.43e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.1.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.42e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.1.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.48e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.1.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.42e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.1.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.56e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.1.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.59e-03, 2.29e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.1.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.10e-03, 1.97e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.1.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.1.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.47e-03, 4.45e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.2.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.56e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.2.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.42e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.2.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.2.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.87e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.2.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.2.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.47e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.2.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.62e-03, 3.36e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.2.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.87e-03, 1.92e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.2.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.2.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.77e-03, 1.12e+00](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.3.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.16e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.00e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.13e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.01e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.36e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.3.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.13e-03, 1.64e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.3.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.13e-03, 1.77e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.3.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.3.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.31e-03, 6.80e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.4.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.60e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.4.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.32e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.4.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.25e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.4.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.88e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.4.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.25e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.4.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.42e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.4.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.19e-03, 1.79e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.4.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.39e-03, 1.53e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.4.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.4.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.62e-03, 3.18e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.5.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.62e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.5.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.33e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.5.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.86e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.5.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.19e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.5.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.86e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.5.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.49e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.5.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.87e-03, 1.46e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.5.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.90e-03, 1.25e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.5.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.5.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.19e-03, 3.75e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.6.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.24e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.6.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.29e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.6.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.6.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.07e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.6.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.6.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.45e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.6.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.56e-03, 2.28e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.6.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.79e-03, 2.28e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.6.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.6.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.71e-03, 5.62e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.7.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.60e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.54e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.60e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.75e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.60e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.85e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.94e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.30e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.7.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[6.02e-03, 2.25e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.7.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.01e-03, 1.28e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.7.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.7.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.92e-03, 3.52e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.8.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.8.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.31e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.8.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.20e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.8.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.27e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.8.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.20e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.8.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.09e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.8.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.18e-03, 2.11e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.8.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.18e-03, 1.36e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.8.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.8.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.75e-03, 4.26e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.9.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.81e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.9.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.20e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.9.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.28e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.9.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.88e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.9.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.28e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.9.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.33e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.9.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.34e-03, 1.71e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.9.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.96e-03, 1.59e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.9.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.9.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.32e-03, 4.30e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.10.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.60e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.10.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.88e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.10.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.10.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.55e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.10.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.10.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.75e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.10.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.97e-03, 2.37e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.10.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.77e-03, 1.36e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.10.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.10.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.27e-03, 2.73e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.11.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.50e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.73e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.50e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.77e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.50e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.56e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.09e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.02e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.11.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.36e-03, 2.00e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.11.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.47e-03, 1.29e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.11.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.11.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.90e-03, 5.82e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.12.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.40e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.12.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.30e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.12.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.12.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.20e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.12.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.12.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.16e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.12.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.89e-03, 1.83e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.12.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.48e-03, 1.04e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.12.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.12.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.66e-03, 2.03e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.13.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.13.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.87e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.13.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.00e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.13.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.12e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.13.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.00e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.13.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.99e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.13.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.56e-03, 1.57e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.13.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.91e-03, 1.46e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.13.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.13.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.80e-03, 2.43e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.14.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.81e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.14.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.01e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.14.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.28e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.14.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.83e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.14.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.28e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.14.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.75e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.14.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.32e-03, 1.20e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.14.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.60e-03, 1.29e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.14.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.14.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.05e-03, 2.27e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.15.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.76e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.32e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.43e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.38e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.59e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.15.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.04e-03, 1.50e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.15.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.04e-03, 1.18e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.15.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.15.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.55e-03, 2.29e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.16.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.50e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.16.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.18e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.16.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.05e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.16.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.31e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.16.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.05e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.16.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.19e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.16.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.97e-03, 1.48e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.16.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.30e-03, 1.17e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.16.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.16.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.71e-03, 3.32e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.17.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.47e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.17.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.38e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.17.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.22e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.17.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.45e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.17.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.22e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.17.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.10e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.17.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.32e-03, 1.93e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.17.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.75e-03, 1.93e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.17.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.17.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.53e-03, 2.11e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.18.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.18.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.16e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.18.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.22e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.18.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.34e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.18.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.22e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.18.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.81e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.18.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.00e-03, 2.56e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.18.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.86e-03, 2.56e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.18.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.18.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.79e-03, 5.66e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.19.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.38e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.71e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.38e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.85e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.38e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.32e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.23e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.91e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.19.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.04e-03, 2.05e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.19.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.04e-03, 1.32e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.19.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.19.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.56e-03, 5.31e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.20.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.20.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.20e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.20.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.12e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.20.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.94e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.20.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.12e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.20.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.71e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.20.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.34e-03, 2.39e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.20.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.34e-03, 1.28e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.20.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.20.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.59e-03, 3.57e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.21.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.21.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.09e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.21.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.22e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.21.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.77e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.21.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.22e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.21.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.35e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.21.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.67e-03, 2.54e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.21.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.25e-03, 1.36e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.21.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.21.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.27e-03, 2.44e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.22.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.22.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.05e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.22.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.98e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.22.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.47e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.22.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.98e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.22.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.74e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.22.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.61e-03, 2.29e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.22.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.12e-03, 2.13e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.22.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.22.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.17e-03, 4.86e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.23.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.85e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.81e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.85e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.89e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.85e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.77e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.58e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.04e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.23.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.26e-03, 1.31e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.23.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.26e-03, 1.12e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.23.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.23.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.02e-03, 6.02e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.24.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.24.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.49e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.24.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.24.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.09e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.24.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.24.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.25e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.24.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.52e-03, 1.52e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.24.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.52e-03, 1.77e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.24.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.24.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.81e-03, 7.19e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.25.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.25.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.93e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.25.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.55e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.25.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.71e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.25.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.55e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.25.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.71e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.25.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.25e-03, 1.46e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.25.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.55e-03, 1.46e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.25.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.25.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.76e-03, 4.94e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.26.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.76e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.26.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.77e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.26.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.26.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.85e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.26.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.26.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.42e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.26.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.14e-03, 1.86e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.26.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.56e-03, 1.86e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.26.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.26.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.16e-03, 6.29e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.27.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.75e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.22e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.75e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.71e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.75e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.64e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.21e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.70e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.27.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.53e-03, 1.71e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.27.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.36e-03, 1.85e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.27.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.27.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.41e-03, 5.39e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.28.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.29e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.28.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.09e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.28.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.28.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.28e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.28.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.28.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.11e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.28.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.04e-03, 2.05e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.28.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.58e-03, 1.46e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.28.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.28.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.78e-03, 3.83e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.29.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.66e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.29.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.48e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.29.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.20e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.29.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.56e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.29.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.20e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.29.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.39e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.29.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.71e-03, 1.51e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.29.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.71e-03, 2.64e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.29.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.29.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.05e-03, 3.67e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.30.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.26e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.30.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.08e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.30.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.05e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.30.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.48e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.30.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.05e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.30.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.69e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.30.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.65e-03, 2.06e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.30.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.00e-03, 3.20e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.30.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.30.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.47e-03, 6.33e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.31.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.15e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.93e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.15e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.12e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.15e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.69e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.06e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.74e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.31.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.55e-03, 1.70e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.31.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.31e-03, 1.58e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.31.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.31.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.53e-03, 6.48e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.32.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.08e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.32.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.58e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.32.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.95e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.32.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.26e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.32.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.95e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.32.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.07e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.32.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.13e-03, 3.09e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.32.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.51e-03, 1.65e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.32.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.32.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.75e-03, 5.47e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.33.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.99e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.33.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.64e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.33.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.33.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.16e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.33.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.33.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.79e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.33.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.46e-03, 2.44e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.33.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.09e-03, 2.85e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.33.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.33.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.84e-03, 3.18e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.34.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.11e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.34.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.14e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.34.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.34.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.29e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.34.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.34.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.14e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.34.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.20e-03, 3.42e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.34.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.20e-03, 2.20e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.34.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.34.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.74e-03, 5.59e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.35.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.12e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.85e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.54e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.09e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.30e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.35.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.51e-03, 3.11e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.35.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.85e-03, 2.22e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.35.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.35.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.84e-03, 6.37e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.36.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.82e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.36.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.96e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.36.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.36.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.54e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.36.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.30e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.36.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.66e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.36.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.10e-03, 3.26e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.36.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.10e-03, 2.33e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.36.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.36.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.14e-03, 2.56e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.37.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.75e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.37.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.30e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.37.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.40e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.37.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.39e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.37.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.40e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.37.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.70e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.37.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.54e-03, 2.48e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.37.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.54e-03, 2.13e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.37.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.37.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.35e-03, 3.01e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.38.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.68e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.38.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.84e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.38.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.00e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.38.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.83e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.38.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.00e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.38.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.41e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.38.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.28e-03, 3.26e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.38.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.46e-03, 1.63e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.38.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.38.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.41e-03, 6.80e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.39.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.22e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.07e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.81e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.14e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.27e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.39.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.77e-03, 2.14e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.39.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.77e-03, 1.99e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.39.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.39.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.25e-03, 5.82e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.40.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.65e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.40.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.24e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.40.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.55e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.40.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.66e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.40.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.55e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.40.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.63e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.40.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.74e-03, 2.66e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.40.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.34e-03, 1.71e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.40.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.40.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.30e-03, 6.56e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.41.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.65e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.41.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.84e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.41.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.25e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.41.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.40e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.41.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.25e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.41.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.12e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.41.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.49e-03, 2.46e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.41.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.94e-03, 1.76e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.41.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.41.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.71e-03, 5.01e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.42.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.00e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.42.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.03e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.42.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.85e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.42.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.40e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.42.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.85e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.42.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.16e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.42.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.34e-03, 3.28e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.42.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.13e-03, 3.54e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.42.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.42.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.14e-03, 5.62e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.43.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.06e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.83e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.06e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.81e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.06e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.54e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.88e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.43e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.43.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.58e-03, 2.05e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.43.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.04e-03, 1.61e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.43.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.43.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.27e-03, 5.86e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.44.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.82e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.44.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.82e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.44.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.10e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.44.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.60e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.44.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.10e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.44.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.20e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.44.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.22e-03, 2.10e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.44.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.16e-03, 1.80e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.44.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.44.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.89e-03, 4.71e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.45.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.02e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.45.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.62e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.45.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.45.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.33e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.45.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.45.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.20e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.45.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.67e-03, 2.54e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.45.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.12e-03, 2.36e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.45.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.45.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.20e-03, 3.83e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.46.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.15e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.46.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.19e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.46.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.46.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.47e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.46.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.46.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.77e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.46.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.37e-03, 2.80e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.46.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.10e-03, 3.26e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.46.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.46.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.38e-03, 6.05e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.47.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.75e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.46e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.75e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.93e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.75e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.75e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.20e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.05e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.47.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.87e-03, 2.49e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.47.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.20e-03, 2.91e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.47.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.47.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.70e-03, 6.05e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.48.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.52e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.48.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.70e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.48.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.48.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.75e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.48.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.48.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.70e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.48.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.32e-03, 3.52e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.48.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.49e-03, 2.51e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.48.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.48.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.94e-03, 5.86e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.49.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.35e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.49.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.34e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.49.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.95e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.49.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.87e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.49.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.95e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.49.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.66e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.49.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.10e-03, 3.67e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.49.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.92e-03, 2.36e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.49.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.49.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.83e-03, 6.33e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.50.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.15e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.50.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.02e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.50.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.65e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.50.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.38e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.50.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.65e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.50.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.06e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.50.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.27e-03, 2.93e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.50.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.45e-03, 2.93e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.50.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.50.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.46e-03, 5.70e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.51.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.12e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.49e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.79e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.58e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.16e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.51.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.82e-03, 2.85e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.51.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.09e-03, 2.65e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.51.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.51.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.21e-03, 7.19e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.52.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.65e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.52.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.55e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.52.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.52.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.81e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.52.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.52.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.90e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.52.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.22e-03, 3.44e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.52.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.45e-03, 2.21e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.52.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.52.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.24e-03, 7.73e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.53.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.15e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.53.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.59e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.53.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.05e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.53.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.01e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.53.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.05e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.53.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.50e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.53.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.41e-03, 3.59e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.53.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.81e-03, 3.59e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.53.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.53.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.23e-03, 5.78e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.54.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.08e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.54.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.41e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.54.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.25e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.54.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.99e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.54.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.25e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.54.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.66e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.54.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.16e-03, 3.77e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.54.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.63e-03, 2.02e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.54.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.54.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.75e-03, 7.03e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.55.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.62e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.81e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.80e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.38e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.09e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.28e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.55.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.66e-03, 3.48e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.55.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.66e-03, 3.23e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.55.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.55.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.66e-03, 7.34e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.56.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.52e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.56.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.01e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.56.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.02e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.56.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.34e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.56.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.02e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.56.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.22e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.56.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.38e-03, 3.71e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.56.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.97e-03, 1.99e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.56.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.56.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.42e-03, 7.23e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.57.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.14e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.57.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.73e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.57.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.92e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.57.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.16e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.57.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.92e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.57.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.85e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.57.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.50e-03, 5.04e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.57.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.50e-03, 1.98e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.57.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.57.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.11e-03, 6.88e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.58.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.36e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.58.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.12e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.58.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.58.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.93e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.58.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.70e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.58.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.96e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.58.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.01e-03, 4.49e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.58.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.01e-03, 2.25e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.58.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.58.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.43e-03, 7.27e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.59.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.93e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.48e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=6.45e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.18e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=8.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.08e+00 calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.59.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.19e-03, 3.57e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.59.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.59e-03, 3.57e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.59.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.59.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.25e-03, 8.05e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.60.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.73e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.60.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.81e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.60.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.32e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.60.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.39e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.60.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.32e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.60.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.03e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.60.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.60e-03, 4.18e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.60.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.13e-03, 3.88e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.60.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.60.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.84e-03, 1.13e+00](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.61.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.28e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.61.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.97e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.61.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.62e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.61.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.89e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.61.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.62e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.61.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.79e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.61.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.69e-03, 4.67e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.61.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[5.73e-03, 4.67e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.61.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.61.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.14e-03, 4.80e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.62.linear_attn.conv1d.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.conv1d.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.conv1d.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.out_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.00e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.62.linear_attn.out_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.out_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.53e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.62.linear_attn.in_proj_qkv.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.82e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.62.linear_attn.in_proj_qkv.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.in_proj_qkv.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=4.80e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.62.linear_attn.in_proj_z.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.82e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.62.linear_attn.in_proj_z.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.in_proj_z.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=3.55e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.62.linear_attn.in_proj_b.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.in_proj_b.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.in_proj_b.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.in_proj_a.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.in_proj_a.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.linear_attn.in_proj_a.weight_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.66e-03, 4.18e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.62.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.20e-03, 2.99e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.62.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.62.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[3.30e-03, 6.56e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.63.self_attn.q_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.q_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.self_attn.q_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.20e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.k_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.k_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.self_attn.k_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=2.52e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.v_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=5.90e+01 calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.v_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.self_attn.v_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=9.96e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.o_proj.input_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=1.45e+02 calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.o_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.self_attn.o_proj.weight_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=7.42e-01 calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.q_bmm_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.self_attn.k_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.v_bmm_quantizer TensorQuantizer((4, 3) bit fake per-tensor amax=dynamic calibrator=MaxCalibrator quant) +model.language_model.layers.63.self_attn.softmax_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.mlp.gate_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.mlp.gate_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.mlp.gate_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.39e-03, 3.57e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.63.mlp.up_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.mlp.up_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.mlp.up_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.79e-03, 2.81e-01](5570560) calibrator=NVFP4MSECalibrator quant) +model.language_model.layers.63.mlp.down_proj.input_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.mlp.down_proj.output_quantizer TensorQuantizer(disabled) +model.language_model.layers.63.mlp.down_proj.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[4.50e-03, 5.04e-01](5570560) calibrator=NVFP4MSECalibrator quant) +lm_head.input_quantizer TensorQuantizer(disabled) +lm_head.output_quantizer TensorQuantizer(disabled) +lm_head.weight_quantizer NVFP4StaticQuantizer((2, 1) bit fake block_sizes={-1: 16, 'type': 'static', 'scale_bits': (4, 3)}, amax=[2.64e-03, 3.63e-01](79462400) calibrator=NVFP4MSECalibrator quant) +2256 TensorQuantizers found in model