Sehyo commited on
Commit
4072608
·
verified ·
1 Parent(s): 76c9b52

fix: add MTP ignore patterns to quantization config for speculative decoding

Browse files
Files changed (1) hide show
  1. config.json +6 -2
config.json CHANGED
@@ -501,7 +501,11 @@
501
  "model.language_model.layers.56.mlp.gate",
502
  "model.language_model.layers.57.mlp.gate",
503
  "model.language_model.layers.58.mlp.gate",
504
- "model.language_model.layers.59.mlp.gate"
 
 
 
 
505
  ],
506
  "kv_cache_scheme": null,
507
  "quant_method": "compressed-tensors",
@@ -644,4 +648,4 @@
644
  },
645
  "vision_end_token_id": 248054,
646
  "vision_start_token_id": 248053
647
- }
 
501
  "model.language_model.layers.56.mlp.gate",
502
  "model.language_model.layers.57.mlp.gate",
503
  "model.language_model.layers.58.mlp.gate",
504
+ "model.language_model.layers.59.mlp.gate",
505
+ "re:model\\.layers\\.\\d+\\.",
506
+ "model.fc",
507
+ "re:mtp\\.layers\\.\\d+\\.",
508
+ "mtp.fc"
509
  ],
510
  "kv_cache_scheme": null,
511
  "quant_method": "compressed-tensors",
 
648
  },
649
  "vision_end_token_id": 248054,
650
  "vision_start_token_id": 248053
651
+ }