PrismaQuant final: visual NVFP4 (108/110 DP-placed), lm_head BF16 (vLLM ParallelLMHead runtime limit), pos_embed excluded
Browse files- config.json +2 -2
- mixed_native_manifest.json +1 -1
- model-00001-of-00016.safetensors +2 -2
- model-00002-of-00016.safetensors +2 -2
- model-00003-of-00016.safetensors +2 -2
- model-00004-of-00016.safetensors +2 -2
- model-00005-of-00016.safetensors +2 -2
- model-00006-of-00016.safetensors +2 -2
- model-00007-of-00016.safetensors +2 -2
- model-00008-of-00016.safetensors +2 -2
- model-00009-of-00016.safetensors +2 -2
- model-00010-of-00016.safetensors +2 -2
- model-00011-of-00016.safetensors +2 -2
- model-00012-of-00016.safetensors +2 -2
- model-00013-of-00016.safetensors +2 -2
- model-00014-of-00016.safetensors +2 -2
- model-00015-of-00016.safetensors +2 -2
- model.safetensors.index.json +2 -2
config.json
CHANGED
|
@@ -382,6 +382,8 @@
|
|
| 382 |
"re:^language_model[.]model[.]layers[.]9[.]mlp[.]experts[.]gate_up_proj$",
|
| 383 |
"re:^mtp[.]layers[.]0[.]mlp[.]experts[.]down_proj$",
|
| 384 |
"re:^mtp[.]layers[.]0[.]mlp[.]experts[.]gate_up_proj$",
|
|
|
|
|
|
|
| 385 |
"re:^language_model[.]model[.]layers[.][0-9]+[.]mlp[.]experts[.][0-9]+[.](gate|up|down)_proj$",
|
| 386 |
"re:^mtp[.]layers[.][0-9]+[.]mlp[.]experts[.][0-9]+[.](gate|up|down)_proj$"
|
| 387 |
]
|
|
@@ -1002,8 +1004,6 @@
|
|
| 1002 |
"visual.blocks.9.attn.qkv",
|
| 1003 |
"visual.blocks.9.mlp.linear_fc1",
|
| 1004 |
"visual.blocks.9.mlp.linear_fc2",
|
| 1005 |
-
"visual.merger.linear_fc1",
|
| 1006 |
-
"visual.merger.linear_fc2",
|
| 1007 |
"visual.pos_embed"
|
| 1008 |
],
|
| 1009 |
"quantization_status": "compressed"
|
|
|
|
| 382 |
"re:^language_model[.]model[.]layers[.]9[.]mlp[.]experts[.]gate_up_proj$",
|
| 383 |
"re:^mtp[.]layers[.]0[.]mlp[.]experts[.]down_proj$",
|
| 384 |
"re:^mtp[.]layers[.]0[.]mlp[.]experts[.]gate_up_proj$",
|
| 385 |
+
"re:^visual[.]merger[.]linear_fc1$",
|
| 386 |
+
"re:^visual[.]merger[.]linear_fc2$",
|
| 387 |
"re:^language_model[.]model[.]layers[.][0-9]+[.]mlp[.]experts[.][0-9]+[.](gate|up|down)_proj$",
|
| 388 |
"re:^mtp[.]layers[.][0-9]+[.]mlp[.]experts[.][0-9]+[.](gate|up|down)_proj$"
|
| 389 |
]
|
|
|
|
| 1004 |
"visual.blocks.9.attn.qkv",
|
| 1005 |
"visual.blocks.9.mlp.linear_fc1",
|
| 1006 |
"visual.blocks.9.mlp.linear_fc2",
|
|
|
|
|
|
|
| 1007 |
"visual.pos_embed"
|
| 1008 |
],
|
| 1009 |
"quantization_status": "compressed"
|
mixed_native_manifest.json
CHANGED
|
@@ -12,7 +12,7 @@
|
|
| 12 |
"mtp_packed_moe_per_expert/NVFP4": 2,
|
| 13 |
"mtp_passthrough/BF16": 9
|
| 14 |
},
|
| 15 |
-
"n_assignment_entries":
|
| 16 |
"ignore": [
|
| 17 |
"lm_head"
|
| 18 |
]
|
|
|
|
| 12 |
"mtp_packed_moe_per_expert/NVFP4": 2,
|
| 13 |
"mtp_passthrough/BF16": 9
|
| 14 |
},
|
| 15 |
+
"n_assignment_entries": 589,
|
| 16 |
"ignore": [
|
| 17 |
"lm_head"
|
| 18 |
]
|
model-00001-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e992111654df517a274cb6d53e39bb938cbe0beec312c1e669132a732d4a25e3
|
| 3 |
+
size 5110883464
|
model-00002-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7d55d0029eeb180fba4fc5abf799494c2e70a723143a475603d078845566fe1
|
| 3 |
+
size 5111844976
|
model-00003-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24e168889551c4652545d0d77b396d8a7a33ad98cf5500d7a685792d68f986af
|
| 3 |
+
size 5110569432
|
model-00004-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:509c635ee89750f29640f934c259a68ae9b2c84d510a2263e879f7e4b6e32225
|
| 3 |
+
size 5094455024
|
model-00005-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a4aad4d93fda81ef5eb452a194c39b147fc0f049ec17e7140ebb88436ce8bd7
|
| 3 |
+
size 5111654544
|
model-00006-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a9d1f2004eb9ea451b73b6aef7634abb6e07eee69a2b32acfb1dcef84f4991d
|
| 3 |
+
size 5110615856
|
model-00007-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98a050de1b4d84b5407b211160577d1818271f9c8fab68a205f3db5de93bc422
|
| 3 |
+
size 5100811840
|
model-00008-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f620ee070d72ddab944cd6ae4a2813360a3507eb6c61ba183f84fe949a260099
|
| 3 |
+
size 5110565856
|
model-00009-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99f6bef674337eee03974f7b563e32482acb66b90396ac782f5662fae55d90b3
|
| 3 |
+
size 5110546200
|
model-00010-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5eeade358a67b520102ce0d0fead204c5ccff73550abccdcf0e455f490201742
|
| 3 |
+
size 5111694072
|
model-00011-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25046cb42aa61032d89ce46d5f1a07a87239f922e8f8fc062dd8474ec1b5b4ff
|
| 3 |
+
size 5111409456
|
model-00012-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9d468cf675eae7e60c67b71959956881a890747bf5ae2d002f18462d070fb6d
|
| 3 |
+
size 5111480504
|
model-00013-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:368826a98ceb81b74740d3f00da850c568b867652f615b19c9d6da38c4f61840
|
| 3 |
+
size 5111407656
|
model-00014-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ac37e33551c811ca7bad6e6c434c5be53cdfd48c1972f43f7b1837337fb3e9e
|
| 3 |
+
size 5111689640
|
model-00015-of-00016.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:275fd6d6912dbd11597151564526bb662f5b97419b343fc627d3edf43af18219
|
| 3 |
+
size 5042843944
|
model.safetensors.index.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:452ce7936da6de8aadd1546324cd33eba2eba17c367295291c7168304954f78d
|
| 3 |
+
size 17414985
|