fix: add MTP ignore patterns to quantization config for speculative decoding
Browse files- config.json +6 -2
config.json
CHANGED
|
@@ -501,7 +501,11 @@
|
|
| 501 |
"model.language_model.layers.56.mlp.gate",
|
| 502 |
"model.language_model.layers.57.mlp.gate",
|
| 503 |
"model.language_model.layers.58.mlp.gate",
|
| 504 |
-
"model.language_model.layers.59.mlp.gate"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
],
|
| 506 |
"kv_cache_scheme": null,
|
| 507 |
"quant_method": "compressed-tensors",
|
|
@@ -644,4 +648,4 @@
|
|
| 644 |
},
|
| 645 |
"vision_end_token_id": 248054,
|
| 646 |
"vision_start_token_id": 248053
|
| 647 |
-
}
|
|
|
|
| 501 |
"model.language_model.layers.56.mlp.gate",
|
| 502 |
"model.language_model.layers.57.mlp.gate",
|
| 503 |
"model.language_model.layers.58.mlp.gate",
|
| 504 |
+
"model.language_model.layers.59.mlp.gate",
|
| 505 |
+
"re:model\\.layers\\.\\d+\\.",
|
| 506 |
+
"model.fc",
|
| 507 |
+
"re:mtp\\.layers\\.\\d+\\.",
|
| 508 |
+
"mtp.fc"
|
| 509 |
],
|
| 510 |
"kv_cache_scheme": null,
|
| 511 |
"quant_method": "compressed-tensors",
|
|
|
|
| 648 |
},
|
| 649 |
"vision_end_token_id": 248054,
|
| 650 |
"vision_start_token_id": 248053
|
| 651 |
+
}
|