rdtand commited on
Commit
2f54276
·
verified ·
1 Parent(s): d99baa1

PrismaQuant final: visual NVFP4 (108/110 DP-placed), lm_head BF16 (vLLM ParallelLMHead runtime limit), pos_embed excluded

Browse files
config.json CHANGED
@@ -382,6 +382,8 @@
382
  "re:^language_model[.]model[.]layers[.]9[.]mlp[.]experts[.]gate_up_proj$",
383
  "re:^mtp[.]layers[.]0[.]mlp[.]experts[.]down_proj$",
384
  "re:^mtp[.]layers[.]0[.]mlp[.]experts[.]gate_up_proj$",
 
 
385
  "re:^language_model[.]model[.]layers[.][0-9]+[.]mlp[.]experts[.][0-9]+[.](gate|up|down)_proj$",
386
  "re:^mtp[.]layers[.][0-9]+[.]mlp[.]experts[.][0-9]+[.](gate|up|down)_proj$"
387
  ]
@@ -1002,8 +1004,6 @@
1002
  "visual.blocks.9.attn.qkv",
1003
  "visual.blocks.9.mlp.linear_fc1",
1004
  "visual.blocks.9.mlp.linear_fc2",
1005
- "visual.merger.linear_fc1",
1006
- "visual.merger.linear_fc2",
1007
  "visual.pos_embed"
1008
  ],
1009
  "quantization_status": "compressed"
 
382
  "re:^language_model[.]model[.]layers[.]9[.]mlp[.]experts[.]gate_up_proj$",
383
  "re:^mtp[.]layers[.]0[.]mlp[.]experts[.]down_proj$",
384
  "re:^mtp[.]layers[.]0[.]mlp[.]experts[.]gate_up_proj$",
385
+ "re:^visual[.]merger[.]linear_fc1$",
386
+ "re:^visual[.]merger[.]linear_fc2$",
387
  "re:^language_model[.]model[.]layers[.][0-9]+[.]mlp[.]experts[.][0-9]+[.](gate|up|down)_proj$",
388
  "re:^mtp[.]layers[.][0-9]+[.]mlp[.]experts[.][0-9]+[.](gate|up|down)_proj$"
389
  ]
 
1004
  "visual.blocks.9.attn.qkv",
1005
  "visual.blocks.9.mlp.linear_fc1",
1006
  "visual.blocks.9.mlp.linear_fc2",
 
 
1007
  "visual.pos_embed"
1008
  ],
1009
  "quantization_status": "compressed"
mixed_native_manifest.json CHANGED
@@ -12,7 +12,7 @@
12
  "mtp_packed_moe_per_expert/NVFP4": 2,
13
  "mtp_passthrough/BF16": 9
14
  },
15
- "n_assignment_entries": 590,
16
  "ignore": [
17
  "lm_head"
18
  ]
 
12
  "mtp_packed_moe_per_expert/NVFP4": 2,
13
  "mtp_passthrough/BF16": 9
14
  },
15
+ "n_assignment_entries": 589,
16
  "ignore": [
17
  "lm_head"
18
  ]
model-00001-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c4fcff082526d1da5b7679802b5d3bf4dd111264c60cb5aa2c6cd2fc86f9a0e
3
- size 5114226512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e992111654df517a274cb6d53e39bb938cbe0beec312c1e669132a732d4a25e3
3
+ size 5110883464
model-00002-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a10e9c7164c651ab540d55d0762fd990a8edbf8858783af14438e5970135aaa
3
- size 5114009072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d55d0029eeb180fba4fc5abf799494c2e70a723143a475603d078845566fe1
3
+ size 5111844976
model-00003-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f6c8c7222397fd84adf82aa855ac9f52e3eab27d3379be62cb36889f28552a0
3
- size 5113912496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24e168889551c4652545d0d77b396d8a7a33ad98cf5500d7a685792d68f986af
3
+ size 5110569432
model-00004-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f368238d4f6b103617baab434c8bf4497ee751d532b99c6f0cf24650511edf85
3
- size 5106839032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:509c635ee89750f29640f934c259a68ae9b2c84d510a2263e879f7e4b6e32225
3
+ size 5094455024
model-00005-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a060f0705b7d59387fc2250307c8ec4d6d9217d29856961e65e514ebb53a6d1d
3
- size 5115003872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a4aad4d93fda81ef5eb452a194c39b147fc0f049ec17e7140ebb88436ce8bd7
3
+ size 5111654544
model-00006-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3fdecc210de7448eb05a3d19b0ead3fb40d115b698c8c9ef1b6ab9953ab1477
3
- size 5114352912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a9d1f2004eb9ea451b73b6aef7634abb6e07eee69a2b32acfb1dcef84f4991d
3
+ size 5110615856
model-00007-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fecaad8e04d4eaaf54daa1e5735ed37de2e625f0dccad00846ab575380d7c1b
3
- size 5114968200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98a050de1b4d84b5407b211160577d1818271f9c8fab68a205f3db5de93bc422
3
+ size 5100811840
model-00008-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:493e8e9a8a2ffdda2db8f54dbb266f7267949ceaee0f96f6a501eb06b5d47c86
3
- size 5114109968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f620ee070d72ddab944cd6ae4a2813360a3507eb6c61ba183f84fe949a260099
3
+ size 5110565856
model-00009-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:933bdb77f37c90a9177daabbca0c6f0ee406af1fe92c774f0ffd6ddd4997d664
3
- size 5114086280
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99f6bef674337eee03974f7b563e32482acb66b90396ac782f5662fae55d90b3
3
+ size 5110546200
model-00010-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:695dde7cda4d948f1cb73ce12fc1f9c11c66667200f77c046aefd2b7628553fb
3
- size 5113661128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eeade358a67b520102ce0d0fead204c5ccff73550abccdcf0e455f490201742
3
+ size 5111694072
model-00011-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf95284b270316d34ade4c012c2a0cbdd3f5473a1cc54b4d911e03dbfd8b6085
3
- size 5114752408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25046cb42aa61032d89ce46d5f1a07a87239f922e8f8fc062dd8474ec1b5b4ff
3
+ size 5111409456
model-00012-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27254df34465a5d06109103fad04f3e7554aed9ca85c8fa9280085ecfa968cec
3
- size 5115020720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d468cf675eae7e60c67b71959956881a890747bf5ae2d002f18462d070fb6d
3
+ size 5111480504
model-00013-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35f52f075258f944d906a640d5179e962b9d10414f9c57123e302ee6758ab609
3
- size 5114947608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:368826a98ceb81b74740d3f00da850c568b867652f615b19c9d6da38c4f61840
3
+ size 5111407656
model-00014-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb7079e13fca9937723fcdf331a6d9c65c32fcf016e184fc82aa293720c88ace
3
- size 5113656704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ac37e33551c811ca7bad6e6c434c5be53cdfd48c1972f43f7b1837337fb3e9e
3
+ size 5111689640
model-00015-of-00016.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec8db6fd3477452bea9e784c16f9169b040aecbf84b8cd03fff7914008229872
3
- size 5029797224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:275fd6d6912dbd11597151564526bb662f5b97419b343fc627d3edf43af18219
3
+ size 5042843944
model.safetensors.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44e7d71867e482556c2c1f4bff9486585871d9666c26ab14fcee802802d282e4
3
- size 17414423
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:452ce7936da6de8aadd1546324cd33eba2eba17c367295291c7168304954f78d
3
+ size 17414985