Image-Text-to-Text
Transformers
Safetensors
English
Chinese
qwen3_5
prismaquant
compressed-tensors
nvfp4
mxfp8
quantized
multimodal
vision-language
mtp
speculative-decoding
vllm
qwen3.6
conversational
8-bit precision
Instructions to use rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm") model = AutoModelForMultimodalLM.from_pretrained("rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm") messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm
- SGLang
How to use rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm with Docker Model Runner:
docker model run hf.co/rdtand/Qwen3.6-27B-PrismaQuant-5.5bit-vllm
v5: max-not-sum sibling aggregation, kernel shape mask, joint input_global — validator: ppl=4.16, mean_NLL=1.43, MTP P0=89.5%
Browse files- config.json +76 -76
- mixed_native_manifest.json +3 -3
- model-00002-of-00006.safetensors +2 -2
- model-00003-of-00006.safetensors +2 -2
- model-00004-of-00006.safetensors +2 -2
- model-00005-of-00006.safetensors +2 -2
- model-00006-of-00006.safetensors +2 -2
- model.safetensors.index.json +148 -113
config.json
CHANGED
|
@@ -165,60 +165,44 @@
|
|
| 165 |
"zp_dtype": "torch.uint8"
|
| 166 |
},
|
| 167 |
"targets": [
|
| 168 |
-
"re:^language_model[.]model[.]layers[.]
|
| 169 |
-
"re:^language_model[.]model[.]layers[.]
|
| 170 |
-
"re:^language_model[.]model[.]layers[.]
|
| 171 |
-
"re:^language_model[.]model[.]layers[.]
|
| 172 |
-
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]k_proj$",
|
| 173 |
-
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]q_proj$",
|
| 174 |
-
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]qkv_proj$",
|
| 175 |
-
"re:^language_model[.]model[.]layers[.]15[.]self_attn[.]v_proj$",
|
| 176 |
-
"re:^language_model[.]model[.]layers[.]27[.]self_attn[.]o_proj$",
|
| 177 |
-
"re:^language_model[.]model[.]layers[.]28[.]linear_attn[.]in_proj_qkv$",
|
| 178 |
-
"re:^language_model[.]model[.]layers[.]28[.]linear_attn[.]in_proj_qkvz$",
|
| 179 |
-
"re:^language_model[.]model[.]layers[.]28[.]linear_attn[.]in_proj_z$",
|
| 180 |
"re:^language_model[.]model[.]layers[.]29[.]linear_attn[.]in_proj_qkv$",
|
| 181 |
"re:^language_model[.]model[.]layers[.]29[.]linear_attn[.]in_proj_qkvz$",
|
| 182 |
"re:^language_model[.]model[.]layers[.]29[.]linear_attn[.]in_proj_z$",
|
| 183 |
-
"re:^language_model[.]model[.]layers[.]
|
| 184 |
-
"re:^language_model[.]model[.]layers[.]
|
| 185 |
-
"re:^language_model[.]model[.]layers[.]
|
| 186 |
-
"re:^language_model[.]model[.]layers[.]
|
|
|
|
|
|
|
| 187 |
"re:^language_model[.]model[.]layers[.]44[.]linear_attn[.]in_proj_qkv$",
|
| 188 |
"re:^language_model[.]model[.]layers[.]44[.]linear_attn[.]in_proj_qkvz$",
|
| 189 |
"re:^language_model[.]model[.]layers[.]44[.]linear_attn[.]in_proj_z$",
|
| 190 |
-
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]gate_proj$",
|
| 191 |
-
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]gate_up_proj$",
|
| 192 |
-
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]up_proj$",
|
| 193 |
-
"re:^language_model[.]model[.]layers[.]46[.]linear_attn[.]out_proj$",
|
| 194 |
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]gate_proj$",
|
| 195 |
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]gate_up_proj$",
|
| 196 |
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]up_proj$",
|
| 197 |
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]gate_proj$",
|
| 198 |
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]gate_up_proj$",
|
| 199 |
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]up_proj$",
|
| 200 |
-
"re:^language_model[.]model[.]layers[.]
|
|
|
|
|
|
|
|
|
|
| 201 |
"re:^language_model[.]model[.]layers[.]56[.]linear_attn[.]in_proj_qkv$",
|
| 202 |
"re:^language_model[.]model[.]layers[.]56[.]linear_attn[.]in_proj_qkvz$",
|
| 203 |
"re:^language_model[.]model[.]layers[.]56[.]linear_attn[.]in_proj_z$",
|
| 204 |
-
"re:^language_model[.]model[.]layers[.]
|
| 205 |
-
"re:^language_model[.]model[.]layers[.]
|
| 206 |
-
"re:^language_model[.]model[.]layers[.]
|
| 207 |
-
"re:^language_model[.]model[.]layers[.]
|
| 208 |
-
"re:^
|
| 209 |
-
"re:^language_model[.]model[.]layers[.]63[.]mlp[.]down_proj$",
|
| 210 |
-
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]k_proj$",
|
| 211 |
-
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]q_proj$",
|
| 212 |
-
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]qkv_proj$",
|
| 213 |
-
"re:^language_model[.]model[.]layers[.]7[.]self_attn[.]v_proj$",
|
| 214 |
-
"re:^visual[.]blocks[.]10[.]attn[.]qkv$",
|
| 215 |
-
"re:^visual[.]blocks[.]11[.]attn[.]proj$",
|
| 216 |
-
"re:^visual[.]blocks[.]11[.]attn[.]qkv$",
|
| 217 |
-
"re:^visual[.]blocks[.]12[.]attn[.]qkv$",
|
| 218 |
-
"re:^visual[.]blocks[.]13[.]attn[.]proj$",
|
| 219 |
-
"re:^visual[.]blocks[.]14[.]attn[.]qkv$",
|
| 220 |
"re:^visual[.]blocks[.]6[.]attn[.]qkv$",
|
| 221 |
-
"re:^visual[.]blocks[.]7[.]attn[.]
|
|
|
|
|
|
|
| 222 |
"re:^visual[.]blocks[.]9[.]attn[.]qkv$"
|
| 223 |
]
|
| 224 |
},
|
|
@@ -324,6 +308,9 @@
|
|
| 324 |
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]gate_proj$",
|
| 325 |
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]gate_up_proj$",
|
| 326 |
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]up_proj$",
|
|
|
|
|
|
|
|
|
|
| 327 |
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_qkv$",
|
| 328 |
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_qkvz$",
|
| 329 |
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_z$",
|
|
@@ -336,11 +323,7 @@
|
|
| 336 |
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]gate_proj$",
|
| 337 |
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]gate_up_proj$",
|
| 338 |
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]up_proj$",
|
| 339 |
-
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]k_proj$",
|
| 340 |
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]o_proj$",
|
| 341 |
-
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]q_proj$",
|
| 342 |
-
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]qkv_proj$",
|
| 343 |
-
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]v_proj$",
|
| 344 |
"re:^language_model[.]model[.]layers[.]2[.]linear_attn[.]in_proj_qkv$",
|
| 345 |
"re:^language_model[.]model[.]layers[.]2[.]linear_attn[.]in_proj_qkvz$",
|
| 346 |
"re:^language_model[.]model[.]layers[.]2[.]linear_attn[.]in_proj_z$",
|
|
@@ -349,6 +332,9 @@
|
|
| 349 |
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]gate_proj$",
|
| 350 |
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]gate_up_proj$",
|
| 351 |
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]up_proj$",
|
|
|
|
|
|
|
|
|
|
| 352 |
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_qkv$",
|
| 353 |
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_qkvz$",
|
| 354 |
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_z$",
|
|
@@ -383,11 +369,7 @@
|
|
| 383 |
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]gate_proj$",
|
| 384 |
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]gate_up_proj$",
|
| 385 |
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]up_proj$",
|
| 386 |
-
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]k_proj$",
|
| 387 |
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]o_proj$",
|
| 388 |
-
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]q_proj$",
|
| 389 |
-
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]qkv_proj$",
|
| 390 |
-
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]v_proj$",
|
| 391 |
"re:^language_model[.]model[.]layers[.]24[.]linear_attn[.]in_proj_qkv$",
|
| 392 |
"re:^language_model[.]model[.]layers[.]24[.]linear_attn[.]in_proj_qkvz$",
|
| 393 |
"re:^language_model[.]model[.]layers[.]24[.]linear_attn[.]in_proj_z$",
|
|
@@ -416,6 +398,10 @@
|
|
| 416 |
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]gate_proj$",
|
| 417 |
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]gate_up_proj$",
|
| 418 |
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]up_proj$",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 419 |
"re:^language_model[.]model[.]layers[.]28[.]linear_attn[.]out_proj$",
|
| 420 |
"re:^language_model[.]model[.]layers[.]28[.]mlp[.]down_proj$",
|
| 421 |
"re:^language_model[.]model[.]layers[.]28[.]mlp[.]gate_proj$",
|
|
@@ -442,6 +428,9 @@
|
|
| 442 |
"re:^language_model[.]model[.]layers[.]31[.]mlp[.]up_proj$",
|
| 443 |
"re:^language_model[.]model[.]layers[.]32[.]linear_attn[.]out_proj$",
|
| 444 |
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]down_proj$",
|
|
|
|
|
|
|
|
|
|
| 445 |
"re:^language_model[.]model[.]layers[.]33[.]linear_attn[.]out_proj$",
|
| 446 |
"re:^language_model[.]model[.]layers[.]33[.]mlp[.]down_proj$",
|
| 447 |
"re:^language_model[.]model[.]layers[.]33[.]mlp[.]gate_proj$",
|
|
@@ -456,6 +445,10 @@
|
|
| 456 |
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]gate_proj$",
|
| 457 |
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]gate_up_proj$",
|
| 458 |
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]up_proj$",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_qkv$",
|
| 460 |
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_qkvz$",
|
| 461 |
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_z$",
|
|
@@ -487,11 +480,7 @@
|
|
| 487 |
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]gate_proj$",
|
| 488 |
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]gate_up_proj$",
|
| 489 |
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]up_proj$",
|
| 490 |
-
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]k_proj$",
|
| 491 |
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]o_proj$",
|
| 492 |
-
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]q_proj$",
|
| 493 |
-
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]qkv_proj$",
|
| 494 |
-
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]v_proj$",
|
| 495 |
"re:^language_model[.]model[.]layers[.]4[.]linear_attn[.]in_proj_a$",
|
| 496 |
"re:^language_model[.]model[.]layers[.]4[.]linear_attn[.]in_proj_b$",
|
| 497 |
"re:^language_model[.]model[.]layers[.]4[.]linear_attn[.]in_proj_ba$",
|
|
@@ -538,8 +527,13 @@
|
|
| 538 |
"re:^language_model[.]model[.]layers[.]44[.]mlp[.]up_proj$",
|
| 539 |
"re:^language_model[.]model[.]layers[.]45[.]linear_attn[.]out_proj$",
|
| 540 |
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]down_proj$",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]down_proj$",
|
| 542 |
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]down_proj$",
|
|
|
|
| 543 |
"re:^language_model[.]model[.]layers[.]48[.]linear_attn[.]out_proj$",
|
| 544 |
"re:^language_model[.]model[.]layers[.]48[.]mlp[.]down_proj$",
|
| 545 |
"re:^language_model[.]model[.]layers[.]48[.]mlp[.]gate_proj$",
|
|
@@ -570,11 +564,7 @@
|
|
| 570 |
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]gate_proj$",
|
| 571 |
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]gate_up_proj$",
|
| 572 |
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]up_proj$",
|
| 573 |
-
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]k_proj$",
|
| 574 |
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]o_proj$",
|
| 575 |
-
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]q_proj$",
|
| 576 |
-
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]qkv_proj$",
|
| 577 |
-
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]v_proj$",
|
| 578 |
"re:^language_model[.]model[.]layers[.]52[.]linear_attn[.]in_proj_a$",
|
| 579 |
"re:^language_model[.]model[.]layers[.]52[.]linear_attn[.]in_proj_b$",
|
| 580 |
"re:^language_model[.]model[.]layers[.]52[.]linear_attn[.]in_proj_ba$",
|
|
@@ -642,6 +632,7 @@
|
|
| 642 |
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]gate_proj$",
|
| 643 |
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]gate_up_proj$",
|
| 644 |
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]up_proj$",
|
|
|
|
| 645 |
"re:^language_model[.]model[.]layers[.]6[.]linear_attn[.]in_proj_qkv$",
|
| 646 |
"re:^language_model[.]model[.]layers[.]6[.]linear_attn[.]in_proj_qkvz$",
|
| 647 |
"re:^language_model[.]model[.]layers[.]6[.]linear_attn[.]in_proj_z$",
|
|
@@ -680,6 +671,7 @@
|
|
| 680 |
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]gate_proj$",
|
| 681 |
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]gate_up_proj$",
|
| 682 |
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]up_proj$",
|
|
|
|
| 683 |
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]o_proj$",
|
| 684 |
"re:^language_model[.]model[.]layers[.]7[.]mlp[.]down_proj$",
|
| 685 |
"re:^language_model[.]model[.]layers[.]7[.]mlp[.]gate_proj$",
|
|
@@ -717,16 +709,23 @@
|
|
| 717 |
"re:^mtp[.]layers[.]0[.]self_attn[.]q_proj$",
|
| 718 |
"re:^mtp[.]layers[.]0[.]self_attn[.]qkv_proj$",
|
| 719 |
"re:^mtp[.]layers[.]0[.]self_attn[.]v_proj$",
|
|
|
|
|
|
|
| 720 |
"re:^visual[.]blocks[.]10[.]mlp[.]linear_fc1$",
|
| 721 |
"re:^visual[.]blocks[.]10[.]mlp[.]linear_fc2$",
|
|
|
|
|
|
|
| 722 |
"re:^visual[.]blocks[.]11[.]mlp[.]linear_fc1$",
|
| 723 |
"re:^visual[.]blocks[.]11[.]mlp[.]linear_fc2$",
|
| 724 |
"re:^visual[.]blocks[.]12[.]attn[.]proj$",
|
|
|
|
| 725 |
"re:^visual[.]blocks[.]12[.]mlp[.]linear_fc1$",
|
| 726 |
"re:^visual[.]blocks[.]12[.]mlp[.]linear_fc2$",
|
|
|
|
| 727 |
"re:^visual[.]blocks[.]13[.]mlp[.]linear_fc1$",
|
| 728 |
"re:^visual[.]blocks[.]13[.]mlp[.]linear_fc2$",
|
| 729 |
"re:^visual[.]blocks[.]14[.]attn[.]proj$",
|
|
|
|
| 730 |
"re:^visual[.]blocks[.]14[.]mlp[.]linear_fc1$",
|
| 731 |
"re:^visual[.]blocks[.]14[.]mlp[.]linear_fc2$",
|
| 732 |
"re:^visual[.]blocks[.]15[.]attn[.]proj$",
|
|
@@ -766,15 +765,18 @@
|
|
| 766 |
"re:^visual[.]blocks[.]24[.]attn[.]proj$",
|
| 767 |
"re:^visual[.]blocks[.]24[.]attn[.]qkv$",
|
| 768 |
"re:^visual[.]blocks[.]24[.]mlp[.]linear_fc1$",
|
|
|
|
| 769 |
"re:^visual[.]blocks[.]25[.]attn[.]proj$",
|
| 770 |
"re:^visual[.]blocks[.]25[.]attn[.]qkv$",
|
| 771 |
"re:^visual[.]blocks[.]25[.]mlp[.]linear_fc1$",
|
| 772 |
"re:^visual[.]blocks[.]26[.]attn[.]proj$",
|
| 773 |
"re:^visual[.]blocks[.]26[.]attn[.]qkv$",
|
| 774 |
"re:^visual[.]blocks[.]26[.]mlp[.]linear_fc1$",
|
|
|
|
| 775 |
"re:^visual[.]blocks[.]5[.]mlp[.]linear_fc2$",
|
| 776 |
"re:^visual[.]blocks[.]6[.]mlp[.]linear_fc1$",
|
| 777 |
"re:^visual[.]blocks[.]6[.]mlp[.]linear_fc2$",
|
|
|
|
| 778 |
"re:^visual[.]blocks[.]7[.]mlp[.]linear_fc1$",
|
| 779 |
"re:^visual[.]blocks[.]7[.]mlp[.]linear_fc2$",
|
| 780 |
"re:^visual[.]blocks[.]8[.]mlp[.]linear_fc1$",
|
|
@@ -796,6 +798,10 @@
|
|
| 796 |
"language_model.model.layers.10.linear_attn.in_proj_a",
|
| 797 |
"language_model.model.layers.10.linear_attn.in_proj_b",
|
| 798 |
"language_model.model.layers.10.linear_attn.in_proj_ba",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 799 |
"language_model.model.layers.12.linear_attn.in_proj_a",
|
| 800 |
"language_model.model.layers.12.linear_attn.in_proj_b",
|
| 801 |
"language_model.model.layers.12.linear_attn.in_proj_ba",
|
|
@@ -805,21 +811,23 @@
|
|
| 805 |
"language_model.model.layers.14.linear_attn.in_proj_a",
|
| 806 |
"language_model.model.layers.14.linear_attn.in_proj_b",
|
| 807 |
"language_model.model.layers.14.linear_attn.in_proj_ba",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 808 |
"language_model.model.layers.16.linear_attn.in_proj_a",
|
| 809 |
"language_model.model.layers.16.linear_attn.in_proj_b",
|
| 810 |
"language_model.model.layers.16.linear_attn.in_proj_ba",
|
| 811 |
"language_model.model.layers.17.linear_attn.in_proj_a",
|
| 812 |
"language_model.model.layers.17.linear_attn.in_proj_b",
|
| 813 |
"language_model.model.layers.17.linear_attn.in_proj_ba",
|
| 814 |
-
"language_model.model.layers.
|
| 815 |
-
"language_model.model.layers.
|
| 816 |
-
"language_model.model.layers.
|
|
|
|
| 817 |
"language_model.model.layers.2.linear_attn.in_proj_a",
|
| 818 |
"language_model.model.layers.2.linear_attn.in_proj_b",
|
| 819 |
"language_model.model.layers.2.linear_attn.in_proj_ba",
|
| 820 |
-
"language_model.model.layers.20.linear_attn.in_proj_a",
|
| 821 |
-
"language_model.model.layers.20.linear_attn.in_proj_b",
|
| 822 |
-
"language_model.model.layers.20.linear_attn.in_proj_ba",
|
| 823 |
"language_model.model.layers.24.linear_attn.in_proj_a",
|
| 824 |
"language_model.model.layers.24.linear_attn.in_proj_b",
|
| 825 |
"language_model.model.layers.24.linear_attn.in_proj_ba",
|
|
@@ -850,7 +858,6 @@
|
|
| 850 |
"language_model.model.layers.30.linear_attn.in_proj_qkvz",
|
| 851 |
"language_model.model.layers.30.linear_attn.in_proj_z",
|
| 852 |
"language_model.model.layers.31.self_attn.k_proj",
|
| 853 |
-
"language_model.model.layers.31.self_attn.o_proj",
|
| 854 |
"language_model.model.layers.31.self_attn.q_proj",
|
| 855 |
"language_model.model.layers.31.self_attn.qkv_proj",
|
| 856 |
"language_model.model.layers.31.self_attn.v_proj",
|
|
@@ -876,9 +883,6 @@
|
|
| 876 |
"language_model.model.layers.35.self_attn.q_proj",
|
| 877 |
"language_model.model.layers.35.self_attn.qkv_proj",
|
| 878 |
"language_model.model.layers.35.self_attn.v_proj",
|
| 879 |
-
"language_model.model.layers.36.linear_attn.in_proj_a",
|
| 880 |
-
"language_model.model.layers.36.linear_attn.in_proj_b",
|
| 881 |
-
"language_model.model.layers.36.linear_attn.in_proj_ba",
|
| 882 |
"language_model.model.layers.38.linear_attn.in_proj_a",
|
| 883 |
"language_model.model.layers.38.linear_attn.in_proj_b",
|
| 884 |
"language_model.model.layers.38.linear_attn.in_proj_ba",
|
|
@@ -892,7 +896,6 @@
|
|
| 892 |
"language_model.model.layers.42.linear_attn.in_proj_qkvz",
|
| 893 |
"language_model.model.layers.42.linear_attn.in_proj_z",
|
| 894 |
"language_model.model.layers.43.self_attn.k_proj",
|
| 895 |
-
"language_model.model.layers.43.self_attn.o_proj",
|
| 896 |
"language_model.model.layers.43.self_attn.q_proj",
|
| 897 |
"language_model.model.layers.43.self_attn.qkv_proj",
|
| 898 |
"language_model.model.layers.43.self_attn.v_proj",
|
|
@@ -942,6 +945,10 @@
|
|
| 942 |
"language_model.model.layers.56.linear_attn.in_proj_a",
|
| 943 |
"language_model.model.layers.56.linear_attn.in_proj_b",
|
| 944 |
"language_model.model.layers.56.linear_attn.in_proj_ba",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 945 |
"language_model.model.layers.6.linear_attn.in_proj_a",
|
| 946 |
"language_model.model.layers.6.linear_attn.in_proj_b",
|
| 947 |
"language_model.model.layers.6.linear_attn.in_proj_ba",
|
|
@@ -951,10 +958,10 @@
|
|
| 951 |
"language_model.model.layers.63.mlp.gate_proj",
|
| 952 |
"language_model.model.layers.63.mlp.gate_up_proj",
|
| 953 |
"language_model.model.layers.63.mlp.up_proj",
|
| 954 |
-
"language_model.model.layers.
|
| 955 |
-
"language_model.model.layers.
|
| 956 |
-
"language_model.model.layers.
|
| 957 |
-
"language_model.model.layers.
|
| 958 |
"mtp.fc",
|
| 959 |
"visual.blocks.0.attn.proj",
|
| 960 |
"visual.blocks.0.attn.qkv",
|
|
@@ -964,15 +971,12 @@
|
|
| 964 |
"visual.blocks.1.attn.qkv",
|
| 965 |
"visual.blocks.1.mlp.linear_fc1",
|
| 966 |
"visual.blocks.1.mlp.linear_fc2",
|
| 967 |
-
"visual.blocks.10.attn.proj",
|
| 968 |
-
"visual.blocks.13.attn.qkv",
|
| 969 |
"visual.blocks.2.attn.proj",
|
| 970 |
"visual.blocks.2.attn.qkv",
|
| 971 |
"visual.blocks.2.mlp.linear_fc1",
|
| 972 |
"visual.blocks.2.mlp.linear_fc2",
|
| 973 |
"visual.blocks.21.mlp.linear_fc2",
|
| 974 |
"visual.blocks.23.mlp.linear_fc2",
|
| 975 |
-
"visual.blocks.24.mlp.linear_fc2",
|
| 976 |
"visual.blocks.25.mlp.linear_fc2",
|
| 977 |
"visual.blocks.26.mlp.linear_fc2",
|
| 978 |
"visual.blocks.3.attn.proj",
|
|
@@ -985,12 +989,8 @@
|
|
| 985 |
"visual.blocks.4.mlp.linear_fc2",
|
| 986 |
"visual.blocks.5.attn.proj",
|
| 987 |
"visual.blocks.5.attn.qkv",
|
| 988 |
-
"visual.blocks.5.mlp.linear_fc1",
|
| 989 |
"visual.blocks.6.attn.proj",
|
| 990 |
-
"visual.blocks.7.attn.proj",
|
| 991 |
"visual.blocks.8.attn.proj",
|
| 992 |
-
"visual.blocks.8.attn.qkv",
|
| 993 |
-
"visual.blocks.9.attn.proj",
|
| 994 |
"visual.pos_embed"
|
| 995 |
],
|
| 996 |
"quantization_status": "compressed"
|
|
|
|
| 165 |
"zp_dtype": "torch.uint8"
|
| 166 |
},
|
| 167 |
"targets": [
|
| 168 |
+
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]k_proj$",
|
| 169 |
+
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]q_proj$",
|
| 170 |
+
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]qkv_proj$",
|
| 171 |
+
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]v_proj$",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
"re:^language_model[.]model[.]layers[.]29[.]linear_attn[.]in_proj_qkv$",
|
| 173 |
"re:^language_model[.]model[.]layers[.]29[.]linear_attn[.]in_proj_qkvz$",
|
| 174 |
"re:^language_model[.]model[.]layers[.]29[.]linear_attn[.]in_proj_z$",
|
| 175 |
+
"re:^language_model[.]model[.]layers[.]31[.]self_attn[.]o_proj$",
|
| 176 |
+
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]k_proj$",
|
| 177 |
+
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]q_proj$",
|
| 178 |
+
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]qkv_proj$",
|
| 179 |
+
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]v_proj$",
|
| 180 |
+
"re:^language_model[.]model[.]layers[.]43[.]self_attn[.]o_proj$",
|
| 181 |
"re:^language_model[.]model[.]layers[.]44[.]linear_attn[.]in_proj_qkv$",
|
| 182 |
"re:^language_model[.]model[.]layers[.]44[.]linear_attn[.]in_proj_qkvz$",
|
| 183 |
"re:^language_model[.]model[.]layers[.]44[.]linear_attn[.]in_proj_z$",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]gate_proj$",
|
| 185 |
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]gate_up_proj$",
|
| 186 |
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]up_proj$",
|
| 187 |
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]gate_proj$",
|
| 188 |
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]gate_up_proj$",
|
| 189 |
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]up_proj$",
|
| 190 |
+
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]k_proj$",
|
| 191 |
+
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]q_proj$",
|
| 192 |
+
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]qkv_proj$",
|
| 193 |
+
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]v_proj$",
|
| 194 |
"re:^language_model[.]model[.]layers[.]56[.]linear_attn[.]in_proj_qkv$",
|
| 195 |
"re:^language_model[.]model[.]layers[.]56[.]linear_attn[.]in_proj_qkvz$",
|
| 196 |
"re:^language_model[.]model[.]layers[.]56[.]linear_attn[.]in_proj_z$",
|
| 197 |
+
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]k_proj$",
|
| 198 |
+
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]q_proj$",
|
| 199 |
+
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]qkv_proj$",
|
| 200 |
+
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]v_proj$",
|
| 201 |
+
"re:^visual[.]blocks[.]13[.]attn[.]qkv$",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
"re:^visual[.]blocks[.]6[.]attn[.]qkv$",
|
| 203 |
+
"re:^visual[.]blocks[.]7[.]attn[.]proj$",
|
| 204 |
+
"re:^visual[.]blocks[.]8[.]attn[.]qkv$",
|
| 205 |
+
"re:^visual[.]blocks[.]9[.]attn[.]proj$",
|
| 206 |
"re:^visual[.]blocks[.]9[.]attn[.]qkv$"
|
| 207 |
]
|
| 208 |
},
|
|
|
|
| 308 |
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]gate_proj$",
|
| 309 |
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]gate_up_proj$",
|
| 310 |
"re:^language_model[.]model[.]layers[.]17[.]mlp[.]up_proj$",
|
| 311 |
+
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_a$",
|
| 312 |
+
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_b$",
|
| 313 |
+
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_ba$",
|
| 314 |
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_qkv$",
|
| 315 |
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_qkvz$",
|
| 316 |
"re:^language_model[.]model[.]layers[.]18[.]linear_attn[.]in_proj_z$",
|
|
|
|
| 323 |
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]gate_proj$",
|
| 324 |
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]gate_up_proj$",
|
| 325 |
"re:^language_model[.]model[.]layers[.]19[.]mlp[.]up_proj$",
|
|
|
|
| 326 |
"re:^language_model[.]model[.]layers[.]19[.]self_attn[.]o_proj$",
|
|
|
|
|
|
|
|
|
|
| 327 |
"re:^language_model[.]model[.]layers[.]2[.]linear_attn[.]in_proj_qkv$",
|
| 328 |
"re:^language_model[.]model[.]layers[.]2[.]linear_attn[.]in_proj_qkvz$",
|
| 329 |
"re:^language_model[.]model[.]layers[.]2[.]linear_attn[.]in_proj_z$",
|
|
|
|
| 332 |
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]gate_proj$",
|
| 333 |
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]gate_up_proj$",
|
| 334 |
"re:^language_model[.]model[.]layers[.]2[.]mlp[.]up_proj$",
|
| 335 |
+
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_a$",
|
| 336 |
+
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_b$",
|
| 337 |
+
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_ba$",
|
| 338 |
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_qkv$",
|
| 339 |
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_qkvz$",
|
| 340 |
"re:^language_model[.]model[.]layers[.]20[.]linear_attn[.]in_proj_z$",
|
|
|
|
| 369 |
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]gate_proj$",
|
| 370 |
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]gate_up_proj$",
|
| 371 |
"re:^language_model[.]model[.]layers[.]23[.]mlp[.]up_proj$",
|
|
|
|
| 372 |
"re:^language_model[.]model[.]layers[.]23[.]self_attn[.]o_proj$",
|
|
|
|
|
|
|
|
|
|
| 373 |
"re:^language_model[.]model[.]layers[.]24[.]linear_attn[.]in_proj_qkv$",
|
| 374 |
"re:^language_model[.]model[.]layers[.]24[.]linear_attn[.]in_proj_qkvz$",
|
| 375 |
"re:^language_model[.]model[.]layers[.]24[.]linear_attn[.]in_proj_z$",
|
|
|
|
| 398 |
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]gate_proj$",
|
| 399 |
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]gate_up_proj$",
|
| 400 |
"re:^language_model[.]model[.]layers[.]27[.]mlp[.]up_proj$",
|
| 401 |
+
"re:^language_model[.]model[.]layers[.]27[.]self_attn[.]o_proj$",
|
| 402 |
+
"re:^language_model[.]model[.]layers[.]28[.]linear_attn[.]in_proj_qkv$",
|
| 403 |
+
"re:^language_model[.]model[.]layers[.]28[.]linear_attn[.]in_proj_qkvz$",
|
| 404 |
+
"re:^language_model[.]model[.]layers[.]28[.]linear_attn[.]in_proj_z$",
|
| 405 |
"re:^language_model[.]model[.]layers[.]28[.]linear_attn[.]out_proj$",
|
| 406 |
"re:^language_model[.]model[.]layers[.]28[.]mlp[.]down_proj$",
|
| 407 |
"re:^language_model[.]model[.]layers[.]28[.]mlp[.]gate_proj$",
|
|
|
|
| 428 |
"re:^language_model[.]model[.]layers[.]31[.]mlp[.]up_proj$",
|
| 429 |
"re:^language_model[.]model[.]layers[.]32[.]linear_attn[.]out_proj$",
|
| 430 |
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]down_proj$",
|
| 431 |
+
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]gate_proj$",
|
| 432 |
+
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]gate_up_proj$",
|
| 433 |
+
"re:^language_model[.]model[.]layers[.]32[.]mlp[.]up_proj$",
|
| 434 |
"re:^language_model[.]model[.]layers[.]33[.]linear_attn[.]out_proj$",
|
| 435 |
"re:^language_model[.]model[.]layers[.]33[.]mlp[.]down_proj$",
|
| 436 |
"re:^language_model[.]model[.]layers[.]33[.]mlp[.]gate_proj$",
|
|
|
|
| 445 |
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]gate_proj$",
|
| 446 |
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]gate_up_proj$",
|
| 447 |
"re:^language_model[.]model[.]layers[.]35[.]mlp[.]up_proj$",
|
| 448 |
+
"re:^language_model[.]model[.]layers[.]35[.]self_attn[.]o_proj$",
|
| 449 |
+
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_a$",
|
| 450 |
+
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_b$",
|
| 451 |
+
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_ba$",
|
| 452 |
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_qkv$",
|
| 453 |
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_qkvz$",
|
| 454 |
"re:^language_model[.]model[.]layers[.]36[.]linear_attn[.]in_proj_z$",
|
|
|
|
| 480 |
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]gate_proj$",
|
| 481 |
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]gate_up_proj$",
|
| 482 |
"re:^language_model[.]model[.]layers[.]39[.]mlp[.]up_proj$",
|
|
|
|
| 483 |
"re:^language_model[.]model[.]layers[.]39[.]self_attn[.]o_proj$",
|
|
|
|
|
|
|
|
|
|
| 484 |
"re:^language_model[.]model[.]layers[.]4[.]linear_attn[.]in_proj_a$",
|
| 485 |
"re:^language_model[.]model[.]layers[.]4[.]linear_attn[.]in_proj_b$",
|
| 486 |
"re:^language_model[.]model[.]layers[.]4[.]linear_attn[.]in_proj_ba$",
|
|
|
|
| 527 |
"re:^language_model[.]model[.]layers[.]44[.]mlp[.]up_proj$",
|
| 528 |
"re:^language_model[.]model[.]layers[.]45[.]linear_attn[.]out_proj$",
|
| 529 |
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]down_proj$",
|
| 530 |
+
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]gate_proj$",
|
| 531 |
+
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]gate_up_proj$",
|
| 532 |
+
"re:^language_model[.]model[.]layers[.]45[.]mlp[.]up_proj$",
|
| 533 |
+
"re:^language_model[.]model[.]layers[.]46[.]linear_attn[.]out_proj$",
|
| 534 |
"re:^language_model[.]model[.]layers[.]46[.]mlp[.]down_proj$",
|
| 535 |
"re:^language_model[.]model[.]layers[.]47[.]mlp[.]down_proj$",
|
| 536 |
+
"re:^language_model[.]model[.]layers[.]47[.]self_attn[.]o_proj$",
|
| 537 |
"re:^language_model[.]model[.]layers[.]48[.]linear_attn[.]out_proj$",
|
| 538 |
"re:^language_model[.]model[.]layers[.]48[.]mlp[.]down_proj$",
|
| 539 |
"re:^language_model[.]model[.]layers[.]48[.]mlp[.]gate_proj$",
|
|
|
|
| 564 |
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]gate_proj$",
|
| 565 |
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]gate_up_proj$",
|
| 566 |
"re:^language_model[.]model[.]layers[.]51[.]mlp[.]up_proj$",
|
|
|
|
| 567 |
"re:^language_model[.]model[.]layers[.]51[.]self_attn[.]o_proj$",
|
|
|
|
|
|
|
|
|
|
| 568 |
"re:^language_model[.]model[.]layers[.]52[.]linear_attn[.]in_proj_a$",
|
| 569 |
"re:^language_model[.]model[.]layers[.]52[.]linear_attn[.]in_proj_b$",
|
| 570 |
"re:^language_model[.]model[.]layers[.]52[.]linear_attn[.]in_proj_ba$",
|
|
|
|
| 632 |
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]gate_proj$",
|
| 633 |
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]gate_up_proj$",
|
| 634 |
"re:^language_model[.]model[.]layers[.]59[.]mlp[.]up_proj$",
|
| 635 |
+
"re:^language_model[.]model[.]layers[.]59[.]self_attn[.]o_proj$",
|
| 636 |
"re:^language_model[.]model[.]layers[.]6[.]linear_attn[.]in_proj_qkv$",
|
| 637 |
"re:^language_model[.]model[.]layers[.]6[.]linear_attn[.]in_proj_qkvz$",
|
| 638 |
"re:^language_model[.]model[.]layers[.]6[.]linear_attn[.]in_proj_z$",
|
|
|
|
| 671 |
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]gate_proj$",
|
| 672 |
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]gate_up_proj$",
|
| 673 |
"re:^language_model[.]model[.]layers[.]62[.]mlp[.]up_proj$",
|
| 674 |
+
"re:^language_model[.]model[.]layers[.]63[.]mlp[.]down_proj$",
|
| 675 |
"re:^language_model[.]model[.]layers[.]63[.]self_attn[.]o_proj$",
|
| 676 |
"re:^language_model[.]model[.]layers[.]7[.]mlp[.]down_proj$",
|
| 677 |
"re:^language_model[.]model[.]layers[.]7[.]mlp[.]gate_proj$",
|
|
|
|
| 709 |
"re:^mtp[.]layers[.]0[.]self_attn[.]q_proj$",
|
| 710 |
"re:^mtp[.]layers[.]0[.]self_attn[.]qkv_proj$",
|
| 711 |
"re:^mtp[.]layers[.]0[.]self_attn[.]v_proj$",
|
| 712 |
+
"re:^visual[.]blocks[.]10[.]attn[.]proj$",
|
| 713 |
+
"re:^visual[.]blocks[.]10[.]attn[.]qkv$",
|
| 714 |
"re:^visual[.]blocks[.]10[.]mlp[.]linear_fc1$",
|
| 715 |
"re:^visual[.]blocks[.]10[.]mlp[.]linear_fc2$",
|
| 716 |
+
"re:^visual[.]blocks[.]11[.]attn[.]proj$",
|
| 717 |
+
"re:^visual[.]blocks[.]11[.]attn[.]qkv$",
|
| 718 |
"re:^visual[.]blocks[.]11[.]mlp[.]linear_fc1$",
|
| 719 |
"re:^visual[.]blocks[.]11[.]mlp[.]linear_fc2$",
|
| 720 |
"re:^visual[.]blocks[.]12[.]attn[.]proj$",
|
| 721 |
+
"re:^visual[.]blocks[.]12[.]attn[.]qkv$",
|
| 722 |
"re:^visual[.]blocks[.]12[.]mlp[.]linear_fc1$",
|
| 723 |
"re:^visual[.]blocks[.]12[.]mlp[.]linear_fc2$",
|
| 724 |
+
"re:^visual[.]blocks[.]13[.]attn[.]proj$",
|
| 725 |
"re:^visual[.]blocks[.]13[.]mlp[.]linear_fc1$",
|
| 726 |
"re:^visual[.]blocks[.]13[.]mlp[.]linear_fc2$",
|
| 727 |
"re:^visual[.]blocks[.]14[.]attn[.]proj$",
|
| 728 |
+
"re:^visual[.]blocks[.]14[.]attn[.]qkv$",
|
| 729 |
"re:^visual[.]blocks[.]14[.]mlp[.]linear_fc1$",
|
| 730 |
"re:^visual[.]blocks[.]14[.]mlp[.]linear_fc2$",
|
| 731 |
"re:^visual[.]blocks[.]15[.]attn[.]proj$",
|
|
|
|
| 765 |
"re:^visual[.]blocks[.]24[.]attn[.]proj$",
|
| 766 |
"re:^visual[.]blocks[.]24[.]attn[.]qkv$",
|
| 767 |
"re:^visual[.]blocks[.]24[.]mlp[.]linear_fc1$",
|
| 768 |
+
"re:^visual[.]blocks[.]24[.]mlp[.]linear_fc2$",
|
| 769 |
"re:^visual[.]blocks[.]25[.]attn[.]proj$",
|
| 770 |
"re:^visual[.]blocks[.]25[.]attn[.]qkv$",
|
| 771 |
"re:^visual[.]blocks[.]25[.]mlp[.]linear_fc1$",
|
| 772 |
"re:^visual[.]blocks[.]26[.]attn[.]proj$",
|
| 773 |
"re:^visual[.]blocks[.]26[.]attn[.]qkv$",
|
| 774 |
"re:^visual[.]blocks[.]26[.]mlp[.]linear_fc1$",
|
| 775 |
+
"re:^visual[.]blocks[.]5[.]mlp[.]linear_fc1$",
|
| 776 |
"re:^visual[.]blocks[.]5[.]mlp[.]linear_fc2$",
|
| 777 |
"re:^visual[.]blocks[.]6[.]mlp[.]linear_fc1$",
|
| 778 |
"re:^visual[.]blocks[.]6[.]mlp[.]linear_fc2$",
|
| 779 |
+
"re:^visual[.]blocks[.]7[.]attn[.]qkv$",
|
| 780 |
"re:^visual[.]blocks[.]7[.]mlp[.]linear_fc1$",
|
| 781 |
"re:^visual[.]blocks[.]7[.]mlp[.]linear_fc2$",
|
| 782 |
"re:^visual[.]blocks[.]8[.]mlp[.]linear_fc1$",
|
|
|
|
| 798 |
"language_model.model.layers.10.linear_attn.in_proj_a",
|
| 799 |
"language_model.model.layers.10.linear_attn.in_proj_b",
|
| 800 |
"language_model.model.layers.10.linear_attn.in_proj_ba",
|
| 801 |
+
"language_model.model.layers.11.self_attn.k_proj",
|
| 802 |
+
"language_model.model.layers.11.self_attn.q_proj",
|
| 803 |
+
"language_model.model.layers.11.self_attn.qkv_proj",
|
| 804 |
+
"language_model.model.layers.11.self_attn.v_proj",
|
| 805 |
"language_model.model.layers.12.linear_attn.in_proj_a",
|
| 806 |
"language_model.model.layers.12.linear_attn.in_proj_b",
|
| 807 |
"language_model.model.layers.12.linear_attn.in_proj_ba",
|
|
|
|
| 811 |
"language_model.model.layers.14.linear_attn.in_proj_a",
|
| 812 |
"language_model.model.layers.14.linear_attn.in_proj_b",
|
| 813 |
"language_model.model.layers.14.linear_attn.in_proj_ba",
|
| 814 |
+
"language_model.model.layers.15.self_attn.k_proj",
|
| 815 |
+
"language_model.model.layers.15.self_attn.q_proj",
|
| 816 |
+
"language_model.model.layers.15.self_attn.qkv_proj",
|
| 817 |
+
"language_model.model.layers.15.self_attn.v_proj",
|
| 818 |
"language_model.model.layers.16.linear_attn.in_proj_a",
|
| 819 |
"language_model.model.layers.16.linear_attn.in_proj_b",
|
| 820 |
"language_model.model.layers.16.linear_attn.in_proj_ba",
|
| 821 |
"language_model.model.layers.17.linear_attn.in_proj_a",
|
| 822 |
"language_model.model.layers.17.linear_attn.in_proj_b",
|
| 823 |
"language_model.model.layers.17.linear_attn.in_proj_ba",
|
| 824 |
+
"language_model.model.layers.19.self_attn.k_proj",
|
| 825 |
+
"language_model.model.layers.19.self_attn.q_proj",
|
| 826 |
+
"language_model.model.layers.19.self_attn.qkv_proj",
|
| 827 |
+
"language_model.model.layers.19.self_attn.v_proj",
|
| 828 |
"language_model.model.layers.2.linear_attn.in_proj_a",
|
| 829 |
"language_model.model.layers.2.linear_attn.in_proj_b",
|
| 830 |
"language_model.model.layers.2.linear_attn.in_proj_ba",
|
|
|
|
|
|
|
|
|
|
| 831 |
"language_model.model.layers.24.linear_attn.in_proj_a",
|
| 832 |
"language_model.model.layers.24.linear_attn.in_proj_b",
|
| 833 |
"language_model.model.layers.24.linear_attn.in_proj_ba",
|
|
|
|
| 858 |
"language_model.model.layers.30.linear_attn.in_proj_qkvz",
|
| 859 |
"language_model.model.layers.30.linear_attn.in_proj_z",
|
| 860 |
"language_model.model.layers.31.self_attn.k_proj",
|
|
|
|
| 861 |
"language_model.model.layers.31.self_attn.q_proj",
|
| 862 |
"language_model.model.layers.31.self_attn.qkv_proj",
|
| 863 |
"language_model.model.layers.31.self_attn.v_proj",
|
|
|
|
| 883 |
"language_model.model.layers.35.self_attn.q_proj",
|
| 884 |
"language_model.model.layers.35.self_attn.qkv_proj",
|
| 885 |
"language_model.model.layers.35.self_attn.v_proj",
|
|
|
|
|
|
|
|
|
|
| 886 |
"language_model.model.layers.38.linear_attn.in_proj_a",
|
| 887 |
"language_model.model.layers.38.linear_attn.in_proj_b",
|
| 888 |
"language_model.model.layers.38.linear_attn.in_proj_ba",
|
|
|
|
| 896 |
"language_model.model.layers.42.linear_attn.in_proj_qkvz",
|
| 897 |
"language_model.model.layers.42.linear_attn.in_proj_z",
|
| 898 |
"language_model.model.layers.43.self_attn.k_proj",
|
|
|
|
| 899 |
"language_model.model.layers.43.self_attn.q_proj",
|
| 900 |
"language_model.model.layers.43.self_attn.qkv_proj",
|
| 901 |
"language_model.model.layers.43.self_attn.v_proj",
|
|
|
|
| 945 |
"language_model.model.layers.56.linear_attn.in_proj_a",
|
| 946 |
"language_model.model.layers.56.linear_attn.in_proj_b",
|
| 947 |
"language_model.model.layers.56.linear_attn.in_proj_ba",
|
| 948 |
+
"language_model.model.layers.59.self_attn.k_proj",
|
| 949 |
+
"language_model.model.layers.59.self_attn.q_proj",
|
| 950 |
+
"language_model.model.layers.59.self_attn.qkv_proj",
|
| 951 |
+
"language_model.model.layers.59.self_attn.v_proj",
|
| 952 |
"language_model.model.layers.6.linear_attn.in_proj_a",
|
| 953 |
"language_model.model.layers.6.linear_attn.in_proj_b",
|
| 954 |
"language_model.model.layers.6.linear_attn.in_proj_ba",
|
|
|
|
| 958 |
"language_model.model.layers.63.mlp.gate_proj",
|
| 959 |
"language_model.model.layers.63.mlp.gate_up_proj",
|
| 960 |
"language_model.model.layers.63.mlp.up_proj",
|
| 961 |
+
"language_model.model.layers.7.self_attn.k_proj",
|
| 962 |
+
"language_model.model.layers.7.self_attn.q_proj",
|
| 963 |
+
"language_model.model.layers.7.self_attn.qkv_proj",
|
| 964 |
+
"language_model.model.layers.7.self_attn.v_proj",
|
| 965 |
"mtp.fc",
|
| 966 |
"visual.blocks.0.attn.proj",
|
| 967 |
"visual.blocks.0.attn.qkv",
|
|
|
|
| 971 |
"visual.blocks.1.attn.qkv",
|
| 972 |
"visual.blocks.1.mlp.linear_fc1",
|
| 973 |
"visual.blocks.1.mlp.linear_fc2",
|
|
|
|
|
|
|
| 974 |
"visual.blocks.2.attn.proj",
|
| 975 |
"visual.blocks.2.attn.qkv",
|
| 976 |
"visual.blocks.2.mlp.linear_fc1",
|
| 977 |
"visual.blocks.2.mlp.linear_fc2",
|
| 978 |
"visual.blocks.21.mlp.linear_fc2",
|
| 979 |
"visual.blocks.23.mlp.linear_fc2",
|
|
|
|
| 980 |
"visual.blocks.25.mlp.linear_fc2",
|
| 981 |
"visual.blocks.26.mlp.linear_fc2",
|
| 982 |
"visual.blocks.3.attn.proj",
|
|
|
|
| 989 |
"visual.blocks.4.mlp.linear_fc2",
|
| 990 |
"visual.blocks.5.attn.proj",
|
| 991 |
"visual.blocks.5.attn.qkv",
|
|
|
|
| 992 |
"visual.blocks.6.attn.proj",
|
|
|
|
| 993 |
"visual.blocks.8.attn.proj",
|
|
|
|
|
|
|
| 994 |
"visual.pos_embed"
|
| 995 |
],
|
| 996 |
"quantization_status": "compressed"
|
mixed_native_manifest.json
CHANGED
|
@@ -3,10 +3,10 @@
|
|
| 3 |
"source_recipe": "/work/artifacts/layer_config.json",
|
| 4 |
"format_histogram": {
|
| 5 |
"head_passthrough/BF16": 3,
|
| 6 |
-
"linear/BF16":
|
| 7 |
-
"linear/NVFP4":
|
| 8 |
"layer_passthrough/BF16": 352,
|
| 9 |
-
"linear/MXFP8":
|
| 10 |
"mtp_linear/NVFP4": 7,
|
| 11 |
"mtp_passthrough/BF16": 8
|
| 12 |
},
|
|
|
|
| 3 |
"source_recipe": "/work/artifacts/layer_config.json",
|
| 4 |
"format_histogram": {
|
| 5 |
"head_passthrough/BF16": 3,
|
| 6 |
+
"linear/BF16": 118,
|
| 7 |
+
"linear/NVFP4": 354,
|
| 8 |
"layer_passthrough/BF16": 352,
|
| 9 |
+
"linear/MXFP8": 24,
|
| 10 |
"mtp_linear/NVFP4": 7,
|
| 11 |
"mtp_passthrough/BF16": 8
|
| 12 |
},
|
model-00002-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5f3d4bca6418ab2d9fc03502124412c85c56b00000d51c2cc98a450b0d4bb13
|
| 3 |
+
size 4516492384
|
model-00003-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:773d4a9f7bb6583c51a2124c732ad3cfb27430f92409516cf438ce83ec18a7c2
|
| 3 |
+
size 4488233624
|
model-00004-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d918713c09bd215109256804998e5b7113ea5126a2822b393ba38f90e4b0d2a
|
| 3 |
+
size 4451868072
|
model-00005-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ad777d8cef79c3c4c2f3eadcb8feb447a31e2cc839950336ce3fc520cdf1e658
|
| 3 |
+
size 4532286704
|
model-00006-of-00006.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:66dc99205ef90ee732d7e17ba02b4bb0c71116d4df855bb5c0bb066c7dee899e
|
| 3 |
+
size 2137290832
|
model.safetensors.index.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
-
"total_size":
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00001-of-00006.safetensors",
|
|
@@ -120,16 +120,13 @@
|
|
| 120 |
"model.language_model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 121 |
"model.language_model.layers.11.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
|
| 122 |
"model.language_model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
|
| 123 |
-
"model.language_model.layers.11.self_attn.k_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 124 |
"model.language_model.layers.11.self_attn.o_proj.input_global_scale": "model-00002-of-00006.safetensors",
|
| 125 |
"model.language_model.layers.11.self_attn.o_proj.weight_global_scale": "model-00002-of-00006.safetensors",
|
| 126 |
"model.language_model.layers.11.self_attn.o_proj.weight_packed": "model-00002-of-00006.safetensors",
|
| 127 |
"model.language_model.layers.11.self_attn.o_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 128 |
"model.language_model.layers.11.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
|
| 129 |
"model.language_model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
|
| 130 |
-
"model.language_model.layers.11.self_attn.q_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 131 |
"model.language_model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
|
| 132 |
-
"model.language_model.layers.11.self_attn.v_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 133 |
"model.language_model.layers.12.input_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 134 |
"model.language_model.layers.12.linear_attn.A_log": "model-00002-of-00006.safetensors",
|
| 135 |
"model.language_model.layers.12.linear_attn.conv1d.weight": "model-00002-of-00006.safetensors",
|
|
@@ -242,16 +239,13 @@
|
|
| 242 |
"model.language_model.layers.15.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 243 |
"model.language_model.layers.15.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
|
| 244 |
"model.language_model.layers.15.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
|
| 245 |
-
"model.language_model.layers.15.self_attn.k_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 246 |
"model.language_model.layers.15.self_attn.o_proj.input_global_scale": "model-00002-of-00006.safetensors",
|
| 247 |
"model.language_model.layers.15.self_attn.o_proj.weight_global_scale": "model-00002-of-00006.safetensors",
|
| 248 |
"model.language_model.layers.15.self_attn.o_proj.weight_packed": "model-00002-of-00006.safetensors",
|
| 249 |
"model.language_model.layers.15.self_attn.o_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 250 |
"model.language_model.layers.15.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
|
| 251 |
"model.language_model.layers.15.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
|
| 252 |
-
"model.language_model.layers.15.self_attn.q_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 253 |
"model.language_model.layers.15.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
|
| 254 |
-
"model.language_model.layers.15.self_attn.v_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 255 |
"model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 256 |
"model.language_model.layers.16.linear_attn.A_log": "model-00002-of-00006.safetensors",
|
| 257 |
"model.language_model.layers.16.linear_attn.conv1d.weight": "model-00002-of-00006.safetensors",
|
|
@@ -260,23 +254,23 @@
|
|
| 260 |
"model.language_model.layers.16.linear_attn.in_proj_b.weight": "model-00002-of-00006.safetensors",
|
| 261 |
"model.language_model.layers.16.linear_attn.in_proj_qkv.input_global_scale": "model-00002-of-00006.safetensors",
|
| 262 |
"model.language_model.layers.16.linear_attn.in_proj_qkv.weight_global_scale": "model-00002-of-00006.safetensors",
|
| 263 |
-
"model.language_model.layers.16.linear_attn.in_proj_qkv.weight_packed": "model-
|
| 264 |
-
"model.language_model.layers.16.linear_attn.in_proj_qkv.weight_scale": "model-
|
| 265 |
-
"model.language_model.layers.16.linear_attn.in_proj_z.input_global_scale": "model-
|
| 266 |
-
"model.language_model.layers.16.linear_attn.in_proj_z.weight_global_scale": "model-
|
| 267 |
-
"model.language_model.layers.16.linear_attn.in_proj_z.weight_packed": "model-
|
| 268 |
-
"model.language_model.layers.16.linear_attn.in_proj_z.weight_scale": "model-
|
| 269 |
-
"model.language_model.layers.16.linear_attn.norm.weight": "model-
|
| 270 |
-
"model.language_model.layers.16.linear_attn.out_proj.input_global_scale": "model-
|
| 271 |
-
"model.language_model.layers.16.linear_attn.out_proj.weight_global_scale": "model-
|
| 272 |
-
"model.language_model.layers.16.linear_attn.out_proj.weight_packed": "model-
|
| 273 |
-
"model.language_model.layers.16.linear_attn.out_proj.weight_scale": "model-
|
| 274 |
-
"model.language_model.layers.16.mlp.down_proj.input_global_scale": "model-
|
| 275 |
-
"model.language_model.layers.16.mlp.down_proj.weight_global_scale": "model-
|
| 276 |
-
"model.language_model.layers.16.mlp.down_proj.weight_packed": "model-
|
| 277 |
-
"model.language_model.layers.16.mlp.down_proj.weight_scale": "model-
|
| 278 |
-
"model.language_model.layers.16.mlp.gate_proj.input_global_scale": "model-
|
| 279 |
-
"model.language_model.layers.16.mlp.gate_proj.weight_global_scale": "model-
|
| 280 |
"model.language_model.layers.16.mlp.gate_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 281 |
"model.language_model.layers.16.mlp.gate_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 282 |
"model.language_model.layers.16.mlp.up_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
|
@@ -320,8 +314,14 @@
|
|
| 320 |
"model.language_model.layers.18.linear_attn.A_log": "model-00003-of-00006.safetensors",
|
| 321 |
"model.language_model.layers.18.linear_attn.conv1d.weight": "model-00003-of-00006.safetensors",
|
| 322 |
"model.language_model.layers.18.linear_attn.dt_bias": "model-00003-of-00006.safetensors",
|
| 323 |
-
"model.language_model.layers.18.linear_attn.in_proj_a.
|
| 324 |
-
"model.language_model.layers.18.linear_attn.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 325 |
"model.language_model.layers.18.linear_attn.in_proj_qkv.input_global_scale": "model-00003-of-00006.safetensors",
|
| 326 |
"model.language_model.layers.18.linear_attn.in_proj_qkv.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 327 |
"model.language_model.layers.18.linear_attn.in_proj_qkv.weight_packed": "model-00003-of-00006.safetensors",
|
|
@@ -363,23 +363,14 @@
|
|
| 363 |
"model.language_model.layers.19.mlp.up_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 364 |
"model.language_model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 365 |
"model.language_model.layers.19.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 366 |
-
"model.language_model.layers.19.self_attn.k_proj.
|
| 367 |
-
"model.language_model.layers.19.self_attn.k_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 368 |
-
"model.language_model.layers.19.self_attn.k_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 369 |
-
"model.language_model.layers.19.self_attn.k_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 370 |
"model.language_model.layers.19.self_attn.o_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 371 |
"model.language_model.layers.19.self_attn.o_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 372 |
"model.language_model.layers.19.self_attn.o_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 373 |
"model.language_model.layers.19.self_attn.o_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 374 |
"model.language_model.layers.19.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
|
| 375 |
-
"model.language_model.layers.19.self_attn.q_proj.
|
| 376 |
-
"model.language_model.layers.19.self_attn.
|
| 377 |
-
"model.language_model.layers.19.self_attn.q_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 378 |
-
"model.language_model.layers.19.self_attn.q_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 379 |
-
"model.language_model.layers.19.self_attn.v_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 380 |
-
"model.language_model.layers.19.self_attn.v_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 381 |
-
"model.language_model.layers.19.self_attn.v_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 382 |
-
"model.language_model.layers.19.self_attn.v_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 383 |
"model.language_model.layers.2.input_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 384 |
"model.language_model.layers.2.linear_attn.A_log": "model-00003-of-00006.safetensors",
|
| 385 |
"model.language_model.layers.2.linear_attn.conv1d.weight": "model-00003-of-00006.safetensors",
|
|
@@ -416,8 +407,14 @@
|
|
| 416 |
"model.language_model.layers.20.linear_attn.A_log": "model-00003-of-00006.safetensors",
|
| 417 |
"model.language_model.layers.20.linear_attn.conv1d.weight": "model-00003-of-00006.safetensors",
|
| 418 |
"model.language_model.layers.20.linear_attn.dt_bias": "model-00003-of-00006.safetensors",
|
| 419 |
-
"model.language_model.layers.20.linear_attn.in_proj_a.
|
| 420 |
-
"model.language_model.layers.20.linear_attn.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 421 |
"model.language_model.layers.20.linear_attn.in_proj_qkv.input_global_scale": "model-00003-of-00006.safetensors",
|
| 422 |
"model.language_model.layers.20.linear_attn.in_proj_qkv.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 423 |
"model.language_model.layers.20.linear_attn.in_proj_qkv.weight_packed": "model-00003-of-00006.safetensors",
|
|
@@ -535,22 +532,16 @@
|
|
| 535 |
"model.language_model.layers.23.mlp.up_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 536 |
"model.language_model.layers.23.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 537 |
"model.language_model.layers.23.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 538 |
-
"model.language_model.layers.23.self_attn.k_proj.
|
| 539 |
-
"model.language_model.layers.23.self_attn.k_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 540 |
-
"model.language_model.layers.23.self_attn.k_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 541 |
"model.language_model.layers.23.self_attn.k_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 542 |
"model.language_model.layers.23.self_attn.o_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 543 |
"model.language_model.layers.23.self_attn.o_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 544 |
"model.language_model.layers.23.self_attn.o_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 545 |
"model.language_model.layers.23.self_attn.o_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 546 |
"model.language_model.layers.23.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
|
| 547 |
-
"model.language_model.layers.23.self_attn.q_proj.
|
| 548 |
-
"model.language_model.layers.23.self_attn.q_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 549 |
-
"model.language_model.layers.23.self_attn.q_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 550 |
"model.language_model.layers.23.self_attn.q_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 551 |
-
"model.language_model.layers.23.self_attn.v_proj.
|
| 552 |
-
"model.language_model.layers.23.self_attn.v_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 553 |
-
"model.language_model.layers.23.self_attn.v_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 554 |
"model.language_model.layers.23.self_attn.v_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 555 |
"model.language_model.layers.24.input_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 556 |
"model.language_model.layers.24.linear_attn.A_log": "model-00003-of-00006.safetensors",
|
|
@@ -664,7 +655,9 @@
|
|
| 664 |
"model.language_model.layers.27.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 665 |
"model.language_model.layers.27.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 666 |
"model.language_model.layers.27.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
|
| 667 |
-
"model.language_model.layers.27.self_attn.o_proj.
|
|
|
|
|
|
|
| 668 |
"model.language_model.layers.27.self_attn.o_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 669 |
"model.language_model.layers.27.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
|
| 670 |
"model.language_model.layers.27.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
|
|
@@ -675,9 +668,13 @@
|
|
| 675 |
"model.language_model.layers.28.linear_attn.dt_bias": "model-00003-of-00006.safetensors",
|
| 676 |
"model.language_model.layers.28.linear_attn.in_proj_a.weight": "model-00003-of-00006.safetensors",
|
| 677 |
"model.language_model.layers.28.linear_attn.in_proj_b.weight": "model-00003-of-00006.safetensors",
|
| 678 |
-
"model.language_model.layers.28.linear_attn.in_proj_qkv.
|
|
|
|
|
|
|
| 679 |
"model.language_model.layers.28.linear_attn.in_proj_qkv.weight_scale": "model-00003-of-00006.safetensors",
|
| 680 |
-
"model.language_model.layers.28.linear_attn.in_proj_z.
|
|
|
|
|
|
|
| 681 |
"model.language_model.layers.28.linear_attn.in_proj_z.weight_scale": "model-00003-of-00006.safetensors",
|
| 682 |
"model.language_model.layers.28.linear_attn.norm.weight": "model-00003-of-00006.safetensors",
|
| 683 |
"model.language_model.layers.28.linear_attn.out_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
|
@@ -791,6 +788,7 @@
|
|
| 791 |
"model.language_model.layers.31.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 792 |
"model.language_model.layers.31.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
|
| 793 |
"model.language_model.layers.31.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
|
|
|
|
| 794 |
"model.language_model.layers.31.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
|
| 795 |
"model.language_model.layers.31.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
|
| 796 |
"model.language_model.layers.31.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
|
|
@@ -800,20 +798,24 @@
|
|
| 800 |
"model.language_model.layers.32.linear_attn.dt_bias": "model-00003-of-00006.safetensors",
|
| 801 |
"model.language_model.layers.32.linear_attn.in_proj_a.weight": "model-00003-of-00006.safetensors",
|
| 802 |
"model.language_model.layers.32.linear_attn.in_proj_b.weight": "model-00003-of-00006.safetensors",
|
| 803 |
-
"model.language_model.layers.32.linear_attn.in_proj_qkv.weight": "model-
|
| 804 |
-
"model.language_model.layers.32.linear_attn.in_proj_z.weight": "model-
|
| 805 |
-
"model.language_model.layers.32.linear_attn.norm.weight": "model-
|
| 806 |
-
"model.language_model.layers.32.linear_attn.out_proj.input_global_scale": "model-
|
| 807 |
-
"model.language_model.layers.32.linear_attn.out_proj.weight_global_scale": "model-
|
| 808 |
-
"model.language_model.layers.32.linear_attn.out_proj.weight_packed": "model-
|
| 809 |
-
"model.language_model.layers.32.linear_attn.out_proj.weight_scale": "model-
|
| 810 |
-
"model.language_model.layers.32.mlp.down_proj.input_global_scale": "model-
|
| 811 |
-
"model.language_model.layers.32.mlp.down_proj.weight_global_scale": "model-
|
| 812 |
"model.language_model.layers.32.mlp.down_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 813 |
"model.language_model.layers.32.mlp.down_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 814 |
-
"model.language_model.layers.32.mlp.gate_proj.
|
|
|
|
|
|
|
| 815 |
"model.language_model.layers.32.mlp.gate_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 816 |
-
"model.language_model.layers.32.mlp.up_proj.
|
|
|
|
|
|
|
| 817 |
"model.language_model.layers.32.mlp.up_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 818 |
"model.language_model.layers.32.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 819 |
"model.language_model.layers.33.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
|
@@ -884,7 +886,9 @@
|
|
| 884 |
"model.language_model.layers.35.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 885 |
"model.language_model.layers.35.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 886 |
"model.language_model.layers.35.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
|
| 887 |
-
"model.language_model.layers.35.self_attn.o_proj.
|
|
|
|
|
|
|
| 888 |
"model.language_model.layers.35.self_attn.o_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 889 |
"model.language_model.layers.35.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 890 |
"model.language_model.layers.35.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
|
|
@@ -893,8 +897,14 @@
|
|
| 893 |
"model.language_model.layers.36.linear_attn.A_log": "model-00004-of-00006.safetensors",
|
| 894 |
"model.language_model.layers.36.linear_attn.conv1d.weight": "model-00004-of-00006.safetensors",
|
| 895 |
"model.language_model.layers.36.linear_attn.dt_bias": "model-00004-of-00006.safetensors",
|
| 896 |
-
"model.language_model.layers.36.linear_attn.in_proj_a.
|
| 897 |
-
"model.language_model.layers.36.linear_attn.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 898 |
"model.language_model.layers.36.linear_attn.in_proj_qkv.input_global_scale": "model-00004-of-00006.safetensors",
|
| 899 |
"model.language_model.layers.36.linear_attn.in_proj_qkv.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 900 |
"model.language_model.layers.36.linear_attn.in_proj_qkv.weight_packed": "model-00004-of-00006.safetensors",
|
|
@@ -1006,22 +1016,16 @@
|
|
| 1006 |
"model.language_model.layers.39.mlp.up_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1007 |
"model.language_model.layers.39.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 1008 |
"model.language_model.layers.39.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 1009 |
-
"model.language_model.layers.39.self_attn.k_proj.
|
| 1010 |
-
"model.language_model.layers.39.self_attn.k_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1011 |
-
"model.language_model.layers.39.self_attn.k_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1012 |
"model.language_model.layers.39.self_attn.k_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1013 |
"model.language_model.layers.39.self_attn.o_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 1014 |
"model.language_model.layers.39.self_attn.o_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1015 |
"model.language_model.layers.39.self_attn.o_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1016 |
"model.language_model.layers.39.self_attn.o_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1017 |
"model.language_model.layers.39.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 1018 |
-
"model.language_model.layers.39.self_attn.q_proj.
|
| 1019 |
-
"model.language_model.layers.39.self_attn.q_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1020 |
-
"model.language_model.layers.39.self_attn.q_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1021 |
"model.language_model.layers.39.self_attn.q_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1022 |
-
"model.language_model.layers.39.self_attn.v_proj.
|
| 1023 |
-
"model.language_model.layers.39.self_attn.v_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1024 |
-
"model.language_model.layers.39.self_attn.v_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1025 |
"model.language_model.layers.39.self_attn.v_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1026 |
"model.language_model.layers.4.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 1027 |
"model.language_model.layers.4.linear_attn.A_log": "model-00004-of-00006.safetensors",
|
|
@@ -1174,6 +1178,7 @@
|
|
| 1174 |
"model.language_model.layers.43.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 1175 |
"model.language_model.layers.43.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
|
| 1176 |
"model.language_model.layers.43.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
|
|
|
|
| 1177 |
"model.language_model.layers.43.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 1178 |
"model.language_model.layers.43.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
|
| 1179 |
"model.language_model.layers.43.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
|
|
@@ -1222,9 +1227,13 @@
|
|
| 1222 |
"model.language_model.layers.45.mlp.down_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1223 |
"model.language_model.layers.45.mlp.down_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1224 |
"model.language_model.layers.45.mlp.down_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1225 |
-
"model.language_model.layers.45.mlp.gate_proj.
|
|
|
|
|
|
|
| 1226 |
"model.language_model.layers.45.mlp.gate_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1227 |
-
"model.language_model.layers.45.mlp.up_proj.
|
|
|
|
|
|
|
| 1228 |
"model.language_model.layers.45.mlp.up_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1229 |
"model.language_model.layers.45.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 1230 |
"model.language_model.layers.46.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
|
@@ -1236,7 +1245,9 @@
|
|
| 1236 |
"model.language_model.layers.46.linear_attn.in_proj_qkv.weight": "model-00004-of-00006.safetensors",
|
| 1237 |
"model.language_model.layers.46.linear_attn.in_proj_z.weight": "model-00004-of-00006.safetensors",
|
| 1238 |
"model.language_model.layers.46.linear_attn.norm.weight": "model-00004-of-00006.safetensors",
|
| 1239 |
-
"model.language_model.layers.46.linear_attn.out_proj.
|
|
|
|
|
|
|
| 1240 |
"model.language_model.layers.46.linear_attn.out_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1241 |
"model.language_model.layers.46.mlp.down_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 1242 |
"model.language_model.layers.46.mlp.down_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
|
@@ -1259,7 +1270,9 @@
|
|
| 1259 |
"model.language_model.layers.47.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1260 |
"model.language_model.layers.47.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
|
| 1261 |
"model.language_model.layers.47.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
|
| 1262 |
-
"model.language_model.layers.47.self_attn.o_proj.
|
|
|
|
|
|
|
| 1263 |
"model.language_model.layers.47.self_attn.o_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1264 |
"model.language_model.layers.47.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
|
| 1265 |
"model.language_model.layers.47.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
|
|
@@ -1395,22 +1408,16 @@
|
|
| 1395 |
"model.language_model.layers.51.mlp.up_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1396 |
"model.language_model.layers.51.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1397 |
"model.language_model.layers.51.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
|
| 1398 |
-
"model.language_model.layers.51.self_attn.k_proj.
|
| 1399 |
-
"model.language_model.layers.51.self_attn.k_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1400 |
-
"model.language_model.layers.51.self_attn.k_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1401 |
"model.language_model.layers.51.self_attn.k_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1402 |
"model.language_model.layers.51.self_attn.o_proj.input_global_scale": "model-00005-of-00006.safetensors",
|
| 1403 |
"model.language_model.layers.51.self_attn.o_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1404 |
"model.language_model.layers.51.self_attn.o_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1405 |
"model.language_model.layers.51.self_attn.o_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1406 |
"model.language_model.layers.51.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
|
| 1407 |
-
"model.language_model.layers.51.self_attn.q_proj.
|
| 1408 |
-
"model.language_model.layers.51.self_attn.q_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1409 |
-
"model.language_model.layers.51.self_attn.q_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1410 |
"model.language_model.layers.51.self_attn.q_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1411 |
-
"model.language_model.layers.51.self_attn.v_proj.
|
| 1412 |
-
"model.language_model.layers.51.self_attn.v_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1413 |
-
"model.language_model.layers.51.self_attn.v_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1414 |
"model.language_model.layers.51.self_attn.v_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1415 |
"model.language_model.layers.52.input_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1416 |
"model.language_model.layers.52.linear_attn.A_log": "model-00005-of-00006.safetensors",
|
|
@@ -1666,14 +1673,13 @@
|
|
| 1666 |
"model.language_model.layers.59.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1667 |
"model.language_model.layers.59.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
|
| 1668 |
"model.language_model.layers.59.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
|
| 1669 |
-
"model.language_model.layers.59.self_attn.
|
| 1670 |
-
"model.language_model.layers.59.self_attn.o_proj.
|
|
|
|
| 1671 |
"model.language_model.layers.59.self_attn.o_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1672 |
"model.language_model.layers.59.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
|
| 1673 |
"model.language_model.layers.59.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
|
| 1674 |
-
"model.language_model.layers.59.self_attn.q_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1675 |
"model.language_model.layers.59.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
|
| 1676 |
-
"model.language_model.layers.59.self_attn.v_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1677 |
"model.language_model.layers.6.input_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1678 |
"model.language_model.layers.6.linear_attn.A_log": "model-00005-of-00006.safetensors",
|
| 1679 |
"model.language_model.layers.6.linear_attn.conv1d.weight": "model-00005-of-00006.safetensors",
|
|
@@ -1800,13 +1806,13 @@
|
|
| 1800 |
"model.language_model.layers.62.linear_attn.out_proj.input_global_scale": "model-00005-of-00006.safetensors",
|
| 1801 |
"model.language_model.layers.62.linear_attn.out_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1802 |
"model.language_model.layers.62.linear_attn.out_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1803 |
-
"model.language_model.layers.62.linear_attn.out_proj.weight_scale": "model-
|
| 1804 |
-
"model.language_model.layers.62.mlp.down_proj.input_global_scale": "model-
|
| 1805 |
-
"model.language_model.layers.62.mlp.down_proj.weight_global_scale": "model-
|
| 1806 |
-
"model.language_model.layers.62.mlp.down_proj.weight_packed": "model-
|
| 1807 |
-
"model.language_model.layers.62.mlp.down_proj.weight_scale": "model-
|
| 1808 |
-
"model.language_model.layers.62.mlp.gate_proj.input_global_scale": "model-
|
| 1809 |
-
"model.language_model.layers.62.mlp.gate_proj.weight_global_scale": "model-
|
| 1810 |
"model.language_model.layers.62.mlp.gate_proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1811 |
"model.language_model.layers.62.mlp.gate_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1812 |
"model.language_model.layers.62.mlp.up_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -1815,20 +1821,25 @@
|
|
| 1815 |
"model.language_model.layers.62.mlp.up_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1816 |
"model.language_model.layers.62.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1817 |
"model.language_model.layers.63.input_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1818 |
-
"model.language_model.layers.63.mlp.down_proj.
|
|
|
|
|
|
|
| 1819 |
"model.language_model.layers.63.mlp.down_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1820 |
"model.language_model.layers.63.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
|
| 1821 |
"model.language_model.layers.63.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
|
| 1822 |
"model.language_model.layers.63.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1823 |
"model.language_model.layers.63.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
|
| 1824 |
"model.language_model.layers.63.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 1825 |
"model.language_model.layers.63.self_attn.o_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1826 |
"model.language_model.layers.63.self_attn.o_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1827 |
"model.language_model.layers.63.self_attn.o_proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1828 |
"model.language_model.layers.63.self_attn.o_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1829 |
"model.language_model.layers.63.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
|
| 1830 |
"model.language_model.layers.63.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 1831 |
"model.language_model.layers.63.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 1832 |
"model.language_model.layers.7.input_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1833 |
"model.language_model.layers.7.mlp.down_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1834 |
"model.language_model.layers.7.mlp.down_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -1845,16 +1856,13 @@
|
|
| 1845 |
"model.language_model.layers.7.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1846 |
"model.language_model.layers.7.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
|
| 1847 |
"model.language_model.layers.7.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
|
| 1848 |
-
"model.language_model.layers.7.self_attn.k_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1849 |
"model.language_model.layers.7.self_attn.o_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1850 |
"model.language_model.layers.7.self_attn.o_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1851 |
"model.language_model.layers.7.self_attn.o_proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1852 |
"model.language_model.layers.7.self_attn.o_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1853 |
"model.language_model.layers.7.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
|
| 1854 |
"model.language_model.layers.7.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
|
| 1855 |
-
"model.language_model.layers.7.self_attn.q_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1856 |
"model.language_model.layers.7.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
|
| 1857 |
-
"model.language_model.layers.7.self_attn.v_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1858 |
"model.language_model.layers.8.input_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1859 |
"model.language_model.layers.8.linear_attn.A_log": "model-00006-of-00006.safetensors",
|
| 1860 |
"model.language_model.layers.8.linear_attn.conv1d.weight": "model-00006-of-00006.safetensors",
|
|
@@ -1957,9 +1965,14 @@
|
|
| 1957 |
"model.visual.blocks.1.norm2.bias": "model-00006-of-00006.safetensors",
|
| 1958 |
"model.visual.blocks.1.norm2.weight": "model-00006-of-00006.safetensors",
|
| 1959 |
"model.visual.blocks.10.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 1960 |
-
"model.visual.blocks.10.attn.proj.
|
|
|
|
|
|
|
|
|
|
| 1961 |
"model.visual.blocks.10.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 1962 |
-
"model.visual.blocks.10.attn.qkv.
|
|
|
|
|
|
|
| 1963 |
"model.visual.blocks.10.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 1964 |
"model.visual.blocks.10.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 1965 |
"model.visual.blocks.10.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -1976,10 +1989,14 @@
|
|
| 1976 |
"model.visual.blocks.10.norm2.bias": "model-00006-of-00006.safetensors",
|
| 1977 |
"model.visual.blocks.10.norm2.weight": "model-00006-of-00006.safetensors",
|
| 1978 |
"model.visual.blocks.11.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 1979 |
-
"model.visual.blocks.11.attn.proj.
|
|
|
|
|
|
|
| 1980 |
"model.visual.blocks.11.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1981 |
"model.visual.blocks.11.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 1982 |
-
"model.visual.blocks.11.attn.qkv.
|
|
|
|
|
|
|
| 1983 |
"model.visual.blocks.11.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 1984 |
"model.visual.blocks.11.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 1985 |
"model.visual.blocks.11.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -2001,7 +2018,9 @@
|
|
| 2001 |
"model.visual.blocks.12.attn.proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 2002 |
"model.visual.blocks.12.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 2003 |
"model.visual.blocks.12.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2004 |
-
"model.visual.blocks.12.attn.qkv.
|
|
|
|
|
|
|
| 2005 |
"model.visual.blocks.12.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2006 |
"model.visual.blocks.12.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2007 |
"model.visual.blocks.12.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -2018,10 +2037,13 @@
|
|
| 2018 |
"model.visual.blocks.12.norm2.bias": "model-00006-of-00006.safetensors",
|
| 2019 |
"model.visual.blocks.12.norm2.weight": "model-00006-of-00006.safetensors",
|
| 2020 |
"model.visual.blocks.13.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 2021 |
-
"model.visual.blocks.13.attn.proj.
|
|
|
|
|
|
|
| 2022 |
"model.visual.blocks.13.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 2023 |
"model.visual.blocks.13.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2024 |
"model.visual.blocks.13.attn.qkv.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 2025 |
"model.visual.blocks.13.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2026 |
"model.visual.blocks.13.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2027 |
"model.visual.blocks.13.mlp.linear_fc1.weight_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -2042,7 +2064,9 @@
|
|
| 2042 |
"model.visual.blocks.14.attn.proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 2043 |
"model.visual.blocks.14.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 2044 |
"model.visual.blocks.14.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2045 |
-
"model.visual.blocks.14.attn.qkv.
|
|
|
|
|
|
|
| 2046 |
"model.visual.blocks.14.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2047 |
"model.visual.blocks.14.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2048 |
"model.visual.blocks.14.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -2296,7 +2320,10 @@
|
|
| 2296 |
"model.visual.blocks.24.mlp.linear_fc1.weight_packed": "model-00006-of-00006.safetensors",
|
| 2297 |
"model.visual.blocks.24.mlp.linear_fc1.weight_scale": "model-00006-of-00006.safetensors",
|
| 2298 |
"model.visual.blocks.24.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
|
| 2299 |
-
"model.visual.blocks.24.mlp.linear_fc2.
|
|
|
|
|
|
|
|
|
|
| 2300 |
"model.visual.blocks.24.norm1.bias": "model-00006-of-00006.safetensors",
|
| 2301 |
"model.visual.blocks.24.norm1.weight": "model-00006-of-00006.safetensors",
|
| 2302 |
"model.visual.blocks.24.norm2.bias": "model-00006-of-00006.safetensors",
|
|
@@ -2372,7 +2399,10 @@
|
|
| 2372 |
"model.visual.blocks.5.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2373 |
"model.visual.blocks.5.attn.qkv.weight": "model-00006-of-00006.safetensors",
|
| 2374 |
"model.visual.blocks.5.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2375 |
-
"model.visual.blocks.5.mlp.linear_fc1.
|
|
|
|
|
|
|
|
|
|
| 2376 |
"model.visual.blocks.5.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
|
| 2377 |
"model.visual.blocks.5.mlp.linear_fc2.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2378 |
"model.visual.blocks.5.mlp.linear_fc2.weight_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -2403,8 +2433,11 @@
|
|
| 2403 |
"model.visual.blocks.6.norm2.weight": "model-00006-of-00006.safetensors",
|
| 2404 |
"model.visual.blocks.7.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 2405 |
"model.visual.blocks.7.attn.proj.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 2406 |
"model.visual.blocks.7.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2407 |
-
"model.visual.blocks.7.attn.qkv.
|
|
|
|
|
|
|
| 2408 |
"model.visual.blocks.7.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2409 |
"model.visual.blocks.7.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2410 |
"model.visual.blocks.7.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -2424,6 +2457,7 @@
|
|
| 2424 |
"model.visual.blocks.8.attn.proj.weight": "model-00006-of-00006.safetensors",
|
| 2425 |
"model.visual.blocks.8.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2426 |
"model.visual.blocks.8.attn.qkv.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 2427 |
"model.visual.blocks.8.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2428 |
"model.visual.blocks.8.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2429 |
"model.visual.blocks.8.mlp.linear_fc1.weight_global_scale": "model-00006-of-00006.safetensors",
|
|
@@ -2440,6 +2474,7 @@
|
|
| 2440 |
"model.visual.blocks.8.norm2.weight": "model-00006-of-00006.safetensors",
|
| 2441 |
"model.visual.blocks.9.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 2442 |
"model.visual.blocks.9.attn.proj.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 2443 |
"model.visual.blocks.9.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2444 |
"model.visual.blocks.9.attn.qkv.weight": "model-00006-of-00006.safetensors",
|
| 2445 |
"model.visual.blocks.9.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 1 |
{
|
| 2 |
"metadata": {
|
| 3 |
+
"total_size": 22668647304
|
| 4 |
},
|
| 5 |
"weight_map": {
|
| 6 |
"lm_head.weight": "model-00001-of-00006.safetensors",
|
|
|
|
| 120 |
"model.language_model.layers.11.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 121 |
"model.language_model.layers.11.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
|
| 122 |
"model.language_model.layers.11.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
|
|
|
|
| 123 |
"model.language_model.layers.11.self_attn.o_proj.input_global_scale": "model-00002-of-00006.safetensors",
|
| 124 |
"model.language_model.layers.11.self_attn.o_proj.weight_global_scale": "model-00002-of-00006.safetensors",
|
| 125 |
"model.language_model.layers.11.self_attn.o_proj.weight_packed": "model-00002-of-00006.safetensors",
|
| 126 |
"model.language_model.layers.11.self_attn.o_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 127 |
"model.language_model.layers.11.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
|
| 128 |
"model.language_model.layers.11.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
|
|
|
|
| 129 |
"model.language_model.layers.11.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
|
|
|
|
| 130 |
"model.language_model.layers.12.input_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 131 |
"model.language_model.layers.12.linear_attn.A_log": "model-00002-of-00006.safetensors",
|
| 132 |
"model.language_model.layers.12.linear_attn.conv1d.weight": "model-00002-of-00006.safetensors",
|
|
|
|
| 239 |
"model.language_model.layers.15.post_attention_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 240 |
"model.language_model.layers.15.self_attn.k_norm.weight": "model-00002-of-00006.safetensors",
|
| 241 |
"model.language_model.layers.15.self_attn.k_proj.weight": "model-00002-of-00006.safetensors",
|
|
|
|
| 242 |
"model.language_model.layers.15.self_attn.o_proj.input_global_scale": "model-00002-of-00006.safetensors",
|
| 243 |
"model.language_model.layers.15.self_attn.o_proj.weight_global_scale": "model-00002-of-00006.safetensors",
|
| 244 |
"model.language_model.layers.15.self_attn.o_proj.weight_packed": "model-00002-of-00006.safetensors",
|
| 245 |
"model.language_model.layers.15.self_attn.o_proj.weight_scale": "model-00002-of-00006.safetensors",
|
| 246 |
"model.language_model.layers.15.self_attn.q_norm.weight": "model-00002-of-00006.safetensors",
|
| 247 |
"model.language_model.layers.15.self_attn.q_proj.weight": "model-00002-of-00006.safetensors",
|
|
|
|
| 248 |
"model.language_model.layers.15.self_attn.v_proj.weight": "model-00002-of-00006.safetensors",
|
|
|
|
| 249 |
"model.language_model.layers.16.input_layernorm.weight": "model-00002-of-00006.safetensors",
|
| 250 |
"model.language_model.layers.16.linear_attn.A_log": "model-00002-of-00006.safetensors",
|
| 251 |
"model.language_model.layers.16.linear_attn.conv1d.weight": "model-00002-of-00006.safetensors",
|
|
|
|
| 254 |
"model.language_model.layers.16.linear_attn.in_proj_b.weight": "model-00002-of-00006.safetensors",
|
| 255 |
"model.language_model.layers.16.linear_attn.in_proj_qkv.input_global_scale": "model-00002-of-00006.safetensors",
|
| 256 |
"model.language_model.layers.16.linear_attn.in_proj_qkv.weight_global_scale": "model-00002-of-00006.safetensors",
|
| 257 |
+
"model.language_model.layers.16.linear_attn.in_proj_qkv.weight_packed": "model-00003-of-00006.safetensors",
|
| 258 |
+
"model.language_model.layers.16.linear_attn.in_proj_qkv.weight_scale": "model-00003-of-00006.safetensors",
|
| 259 |
+
"model.language_model.layers.16.linear_attn.in_proj_z.input_global_scale": "model-00003-of-00006.safetensors",
|
| 260 |
+
"model.language_model.layers.16.linear_attn.in_proj_z.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 261 |
+
"model.language_model.layers.16.linear_attn.in_proj_z.weight_packed": "model-00003-of-00006.safetensors",
|
| 262 |
+
"model.language_model.layers.16.linear_attn.in_proj_z.weight_scale": "model-00003-of-00006.safetensors",
|
| 263 |
+
"model.language_model.layers.16.linear_attn.norm.weight": "model-00003-of-00006.safetensors",
|
| 264 |
+
"model.language_model.layers.16.linear_attn.out_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 265 |
+
"model.language_model.layers.16.linear_attn.out_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 266 |
+
"model.language_model.layers.16.linear_attn.out_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 267 |
+
"model.language_model.layers.16.linear_attn.out_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 268 |
+
"model.language_model.layers.16.mlp.down_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 269 |
+
"model.language_model.layers.16.mlp.down_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 270 |
+
"model.language_model.layers.16.mlp.down_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 271 |
+
"model.language_model.layers.16.mlp.down_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 272 |
+
"model.language_model.layers.16.mlp.gate_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 273 |
+
"model.language_model.layers.16.mlp.gate_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 274 |
"model.language_model.layers.16.mlp.gate_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 275 |
"model.language_model.layers.16.mlp.gate_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 276 |
"model.language_model.layers.16.mlp.up_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
|
|
|
| 314 |
"model.language_model.layers.18.linear_attn.A_log": "model-00003-of-00006.safetensors",
|
| 315 |
"model.language_model.layers.18.linear_attn.conv1d.weight": "model-00003-of-00006.safetensors",
|
| 316 |
"model.language_model.layers.18.linear_attn.dt_bias": "model-00003-of-00006.safetensors",
|
| 317 |
+
"model.language_model.layers.18.linear_attn.in_proj_a.input_global_scale": "model-00003-of-00006.safetensors",
|
| 318 |
+
"model.language_model.layers.18.linear_attn.in_proj_a.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 319 |
+
"model.language_model.layers.18.linear_attn.in_proj_a.weight_packed": "model-00003-of-00006.safetensors",
|
| 320 |
+
"model.language_model.layers.18.linear_attn.in_proj_a.weight_scale": "model-00003-of-00006.safetensors",
|
| 321 |
+
"model.language_model.layers.18.linear_attn.in_proj_b.input_global_scale": "model-00003-of-00006.safetensors",
|
| 322 |
+
"model.language_model.layers.18.linear_attn.in_proj_b.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 323 |
+
"model.language_model.layers.18.linear_attn.in_proj_b.weight_packed": "model-00003-of-00006.safetensors",
|
| 324 |
+
"model.language_model.layers.18.linear_attn.in_proj_b.weight_scale": "model-00003-of-00006.safetensors",
|
| 325 |
"model.language_model.layers.18.linear_attn.in_proj_qkv.input_global_scale": "model-00003-of-00006.safetensors",
|
| 326 |
"model.language_model.layers.18.linear_attn.in_proj_qkv.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 327 |
"model.language_model.layers.18.linear_attn.in_proj_qkv.weight_packed": "model-00003-of-00006.safetensors",
|
|
|
|
| 363 |
"model.language_model.layers.19.mlp.up_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 364 |
"model.language_model.layers.19.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 365 |
"model.language_model.layers.19.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 366 |
+
"model.language_model.layers.19.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
|
|
|
|
|
|
|
|
|
|
| 367 |
"model.language_model.layers.19.self_attn.o_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 368 |
"model.language_model.layers.19.self_attn.o_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 369 |
"model.language_model.layers.19.self_attn.o_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 370 |
"model.language_model.layers.19.self_attn.o_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 371 |
"model.language_model.layers.19.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
|
| 372 |
+
"model.language_model.layers.19.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
|
| 373 |
+
"model.language_model.layers.19.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
"model.language_model.layers.2.input_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 375 |
"model.language_model.layers.2.linear_attn.A_log": "model-00003-of-00006.safetensors",
|
| 376 |
"model.language_model.layers.2.linear_attn.conv1d.weight": "model-00003-of-00006.safetensors",
|
|
|
|
| 407 |
"model.language_model.layers.20.linear_attn.A_log": "model-00003-of-00006.safetensors",
|
| 408 |
"model.language_model.layers.20.linear_attn.conv1d.weight": "model-00003-of-00006.safetensors",
|
| 409 |
"model.language_model.layers.20.linear_attn.dt_bias": "model-00003-of-00006.safetensors",
|
| 410 |
+
"model.language_model.layers.20.linear_attn.in_proj_a.input_global_scale": "model-00003-of-00006.safetensors",
|
| 411 |
+
"model.language_model.layers.20.linear_attn.in_proj_a.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 412 |
+
"model.language_model.layers.20.linear_attn.in_proj_a.weight_packed": "model-00003-of-00006.safetensors",
|
| 413 |
+
"model.language_model.layers.20.linear_attn.in_proj_a.weight_scale": "model-00003-of-00006.safetensors",
|
| 414 |
+
"model.language_model.layers.20.linear_attn.in_proj_b.input_global_scale": "model-00003-of-00006.safetensors",
|
| 415 |
+
"model.language_model.layers.20.linear_attn.in_proj_b.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 416 |
+
"model.language_model.layers.20.linear_attn.in_proj_b.weight_packed": "model-00003-of-00006.safetensors",
|
| 417 |
+
"model.language_model.layers.20.linear_attn.in_proj_b.weight_scale": "model-00003-of-00006.safetensors",
|
| 418 |
"model.language_model.layers.20.linear_attn.in_proj_qkv.input_global_scale": "model-00003-of-00006.safetensors",
|
| 419 |
"model.language_model.layers.20.linear_attn.in_proj_qkv.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 420 |
"model.language_model.layers.20.linear_attn.in_proj_qkv.weight_packed": "model-00003-of-00006.safetensors",
|
|
|
|
| 532 |
"model.language_model.layers.23.mlp.up_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 533 |
"model.language_model.layers.23.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 534 |
"model.language_model.layers.23.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 535 |
+
"model.language_model.layers.23.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
|
|
|
|
|
|
|
| 536 |
"model.language_model.layers.23.self_attn.k_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 537 |
"model.language_model.layers.23.self_attn.o_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 538 |
"model.language_model.layers.23.self_attn.o_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 539 |
"model.language_model.layers.23.self_attn.o_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 540 |
"model.language_model.layers.23.self_attn.o_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 541 |
"model.language_model.layers.23.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
|
| 542 |
+
"model.language_model.layers.23.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
|
|
|
|
|
|
|
| 543 |
"model.language_model.layers.23.self_attn.q_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 544 |
+
"model.language_model.layers.23.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
|
|
|
|
|
|
|
| 545 |
"model.language_model.layers.23.self_attn.v_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 546 |
"model.language_model.layers.24.input_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 547 |
"model.language_model.layers.24.linear_attn.A_log": "model-00003-of-00006.safetensors",
|
|
|
|
| 655 |
"model.language_model.layers.27.post_attention_layernorm.weight": "model-00003-of-00006.safetensors",
|
| 656 |
"model.language_model.layers.27.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 657 |
"model.language_model.layers.27.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
|
| 658 |
+
"model.language_model.layers.27.self_attn.o_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
| 659 |
+
"model.language_model.layers.27.self_attn.o_proj.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 660 |
+
"model.language_model.layers.27.self_attn.o_proj.weight_packed": "model-00003-of-00006.safetensors",
|
| 661 |
"model.language_model.layers.27.self_attn.o_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 662 |
"model.language_model.layers.27.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
|
| 663 |
"model.language_model.layers.27.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
|
|
|
|
| 668 |
"model.language_model.layers.28.linear_attn.dt_bias": "model-00003-of-00006.safetensors",
|
| 669 |
"model.language_model.layers.28.linear_attn.in_proj_a.weight": "model-00003-of-00006.safetensors",
|
| 670 |
"model.language_model.layers.28.linear_attn.in_proj_b.weight": "model-00003-of-00006.safetensors",
|
| 671 |
+
"model.language_model.layers.28.linear_attn.in_proj_qkv.input_global_scale": "model-00003-of-00006.safetensors",
|
| 672 |
+
"model.language_model.layers.28.linear_attn.in_proj_qkv.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 673 |
+
"model.language_model.layers.28.linear_attn.in_proj_qkv.weight_packed": "model-00003-of-00006.safetensors",
|
| 674 |
"model.language_model.layers.28.linear_attn.in_proj_qkv.weight_scale": "model-00003-of-00006.safetensors",
|
| 675 |
+
"model.language_model.layers.28.linear_attn.in_proj_z.input_global_scale": "model-00003-of-00006.safetensors",
|
| 676 |
+
"model.language_model.layers.28.linear_attn.in_proj_z.weight_global_scale": "model-00003-of-00006.safetensors",
|
| 677 |
+
"model.language_model.layers.28.linear_attn.in_proj_z.weight_packed": "model-00003-of-00006.safetensors",
|
| 678 |
"model.language_model.layers.28.linear_attn.in_proj_z.weight_scale": "model-00003-of-00006.safetensors",
|
| 679 |
"model.language_model.layers.28.linear_attn.norm.weight": "model-00003-of-00006.safetensors",
|
| 680 |
"model.language_model.layers.28.linear_attn.out_proj.input_global_scale": "model-00003-of-00006.safetensors",
|
|
|
|
| 788 |
"model.language_model.layers.31.self_attn.k_norm.weight": "model-00003-of-00006.safetensors",
|
| 789 |
"model.language_model.layers.31.self_attn.k_proj.weight": "model-00003-of-00006.safetensors",
|
| 790 |
"model.language_model.layers.31.self_attn.o_proj.weight": "model-00003-of-00006.safetensors",
|
| 791 |
+
"model.language_model.layers.31.self_attn.o_proj.weight_scale": "model-00003-of-00006.safetensors",
|
| 792 |
"model.language_model.layers.31.self_attn.q_norm.weight": "model-00003-of-00006.safetensors",
|
| 793 |
"model.language_model.layers.31.self_attn.q_proj.weight": "model-00003-of-00006.safetensors",
|
| 794 |
"model.language_model.layers.31.self_attn.v_proj.weight": "model-00003-of-00006.safetensors",
|
|
|
|
| 798 |
"model.language_model.layers.32.linear_attn.dt_bias": "model-00003-of-00006.safetensors",
|
| 799 |
"model.language_model.layers.32.linear_attn.in_proj_a.weight": "model-00003-of-00006.safetensors",
|
| 800 |
"model.language_model.layers.32.linear_attn.in_proj_b.weight": "model-00003-of-00006.safetensors",
|
| 801 |
+
"model.language_model.layers.32.linear_attn.in_proj_qkv.weight": "model-00004-of-00006.safetensors",
|
| 802 |
+
"model.language_model.layers.32.linear_attn.in_proj_z.weight": "model-00004-of-00006.safetensors",
|
| 803 |
+
"model.language_model.layers.32.linear_attn.norm.weight": "model-00004-of-00006.safetensors",
|
| 804 |
+
"model.language_model.layers.32.linear_attn.out_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 805 |
+
"model.language_model.layers.32.linear_attn.out_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 806 |
+
"model.language_model.layers.32.linear_attn.out_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 807 |
+
"model.language_model.layers.32.linear_attn.out_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 808 |
+
"model.language_model.layers.32.mlp.down_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 809 |
+
"model.language_model.layers.32.mlp.down_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 810 |
"model.language_model.layers.32.mlp.down_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 811 |
"model.language_model.layers.32.mlp.down_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 812 |
+
"model.language_model.layers.32.mlp.gate_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 813 |
+
"model.language_model.layers.32.mlp.gate_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 814 |
+
"model.language_model.layers.32.mlp.gate_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 815 |
"model.language_model.layers.32.mlp.gate_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 816 |
+
"model.language_model.layers.32.mlp.up_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 817 |
+
"model.language_model.layers.32.mlp.up_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 818 |
+
"model.language_model.layers.32.mlp.up_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 819 |
"model.language_model.layers.32.mlp.up_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 820 |
"model.language_model.layers.32.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 821 |
"model.language_model.layers.33.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
|
|
|
| 886 |
"model.language_model.layers.35.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 887 |
"model.language_model.layers.35.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 888 |
"model.language_model.layers.35.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
|
| 889 |
+
"model.language_model.layers.35.self_attn.o_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 890 |
+
"model.language_model.layers.35.self_attn.o_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 891 |
+
"model.language_model.layers.35.self_attn.o_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 892 |
"model.language_model.layers.35.self_attn.o_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 893 |
"model.language_model.layers.35.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 894 |
"model.language_model.layers.35.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
|
|
|
|
| 897 |
"model.language_model.layers.36.linear_attn.A_log": "model-00004-of-00006.safetensors",
|
| 898 |
"model.language_model.layers.36.linear_attn.conv1d.weight": "model-00004-of-00006.safetensors",
|
| 899 |
"model.language_model.layers.36.linear_attn.dt_bias": "model-00004-of-00006.safetensors",
|
| 900 |
+
"model.language_model.layers.36.linear_attn.in_proj_a.input_global_scale": "model-00004-of-00006.safetensors",
|
| 901 |
+
"model.language_model.layers.36.linear_attn.in_proj_a.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 902 |
+
"model.language_model.layers.36.linear_attn.in_proj_a.weight_packed": "model-00004-of-00006.safetensors",
|
| 903 |
+
"model.language_model.layers.36.linear_attn.in_proj_a.weight_scale": "model-00004-of-00006.safetensors",
|
| 904 |
+
"model.language_model.layers.36.linear_attn.in_proj_b.input_global_scale": "model-00004-of-00006.safetensors",
|
| 905 |
+
"model.language_model.layers.36.linear_attn.in_proj_b.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 906 |
+
"model.language_model.layers.36.linear_attn.in_proj_b.weight_packed": "model-00004-of-00006.safetensors",
|
| 907 |
+
"model.language_model.layers.36.linear_attn.in_proj_b.weight_scale": "model-00004-of-00006.safetensors",
|
| 908 |
"model.language_model.layers.36.linear_attn.in_proj_qkv.input_global_scale": "model-00004-of-00006.safetensors",
|
| 909 |
"model.language_model.layers.36.linear_attn.in_proj_qkv.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 910 |
"model.language_model.layers.36.linear_attn.in_proj_qkv.weight_packed": "model-00004-of-00006.safetensors",
|
|
|
|
| 1016 |
"model.language_model.layers.39.mlp.up_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1017 |
"model.language_model.layers.39.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 1018 |
"model.language_model.layers.39.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 1019 |
+
"model.language_model.layers.39.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
|
|
|
|
|
|
|
| 1020 |
"model.language_model.layers.39.self_attn.k_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1021 |
"model.language_model.layers.39.self_attn.o_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 1022 |
"model.language_model.layers.39.self_attn.o_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1023 |
"model.language_model.layers.39.self_attn.o_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1024 |
"model.language_model.layers.39.self_attn.o_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1025 |
"model.language_model.layers.39.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 1026 |
+
"model.language_model.layers.39.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
|
|
|
|
|
|
|
| 1027 |
"model.language_model.layers.39.self_attn.q_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1028 |
+
"model.language_model.layers.39.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
|
|
|
|
|
|
|
| 1029 |
"model.language_model.layers.39.self_attn.v_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1030 |
"model.language_model.layers.4.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 1031 |
"model.language_model.layers.4.linear_attn.A_log": "model-00004-of-00006.safetensors",
|
|
|
|
| 1178 |
"model.language_model.layers.43.self_attn.k_norm.weight": "model-00004-of-00006.safetensors",
|
| 1179 |
"model.language_model.layers.43.self_attn.k_proj.weight": "model-00004-of-00006.safetensors",
|
| 1180 |
"model.language_model.layers.43.self_attn.o_proj.weight": "model-00004-of-00006.safetensors",
|
| 1181 |
+
"model.language_model.layers.43.self_attn.o_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1182 |
"model.language_model.layers.43.self_attn.q_norm.weight": "model-00004-of-00006.safetensors",
|
| 1183 |
"model.language_model.layers.43.self_attn.q_proj.weight": "model-00004-of-00006.safetensors",
|
| 1184 |
"model.language_model.layers.43.self_attn.v_proj.weight": "model-00004-of-00006.safetensors",
|
|
|
|
| 1227 |
"model.language_model.layers.45.mlp.down_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1228 |
"model.language_model.layers.45.mlp.down_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1229 |
"model.language_model.layers.45.mlp.down_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1230 |
+
"model.language_model.layers.45.mlp.gate_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 1231 |
+
"model.language_model.layers.45.mlp.gate_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1232 |
+
"model.language_model.layers.45.mlp.gate_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1233 |
"model.language_model.layers.45.mlp.gate_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1234 |
+
"model.language_model.layers.45.mlp.up_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 1235 |
+
"model.language_model.layers.45.mlp.up_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1236 |
+
"model.language_model.layers.45.mlp.up_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1237 |
"model.language_model.layers.45.mlp.up_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1238 |
"model.language_model.layers.45.post_attention_layernorm.weight": "model-00004-of-00006.safetensors",
|
| 1239 |
"model.language_model.layers.46.input_layernorm.weight": "model-00004-of-00006.safetensors",
|
|
|
|
| 1245 |
"model.language_model.layers.46.linear_attn.in_proj_qkv.weight": "model-00004-of-00006.safetensors",
|
| 1246 |
"model.language_model.layers.46.linear_attn.in_proj_z.weight": "model-00004-of-00006.safetensors",
|
| 1247 |
"model.language_model.layers.46.linear_attn.norm.weight": "model-00004-of-00006.safetensors",
|
| 1248 |
+
"model.language_model.layers.46.linear_attn.out_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 1249 |
+
"model.language_model.layers.46.linear_attn.out_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
| 1250 |
+
"model.language_model.layers.46.linear_attn.out_proj.weight_packed": "model-00004-of-00006.safetensors",
|
| 1251 |
"model.language_model.layers.46.linear_attn.out_proj.weight_scale": "model-00004-of-00006.safetensors",
|
| 1252 |
"model.language_model.layers.46.mlp.down_proj.input_global_scale": "model-00004-of-00006.safetensors",
|
| 1253 |
"model.language_model.layers.46.mlp.down_proj.weight_global_scale": "model-00004-of-00006.safetensors",
|
|
|
|
| 1270 |
"model.language_model.layers.47.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1271 |
"model.language_model.layers.47.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
|
| 1272 |
"model.language_model.layers.47.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
|
| 1273 |
+
"model.language_model.layers.47.self_attn.o_proj.input_global_scale": "model-00005-of-00006.safetensors",
|
| 1274 |
+
"model.language_model.layers.47.self_attn.o_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1275 |
+
"model.language_model.layers.47.self_attn.o_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1276 |
"model.language_model.layers.47.self_attn.o_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1277 |
"model.language_model.layers.47.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
|
| 1278 |
"model.language_model.layers.47.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
|
|
|
|
| 1408 |
"model.language_model.layers.51.mlp.up_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1409 |
"model.language_model.layers.51.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1410 |
"model.language_model.layers.51.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
|
| 1411 |
+
"model.language_model.layers.51.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
|
|
|
|
|
|
|
| 1412 |
"model.language_model.layers.51.self_attn.k_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1413 |
"model.language_model.layers.51.self_attn.o_proj.input_global_scale": "model-00005-of-00006.safetensors",
|
| 1414 |
"model.language_model.layers.51.self_attn.o_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1415 |
"model.language_model.layers.51.self_attn.o_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1416 |
"model.language_model.layers.51.self_attn.o_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1417 |
"model.language_model.layers.51.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
|
| 1418 |
+
"model.language_model.layers.51.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
|
|
|
|
|
|
|
| 1419 |
"model.language_model.layers.51.self_attn.q_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1420 |
+
"model.language_model.layers.51.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
|
|
|
|
|
|
|
| 1421 |
"model.language_model.layers.51.self_attn.v_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1422 |
"model.language_model.layers.52.input_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1423 |
"model.language_model.layers.52.linear_attn.A_log": "model-00005-of-00006.safetensors",
|
|
|
|
| 1673 |
"model.language_model.layers.59.post_attention_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1674 |
"model.language_model.layers.59.self_attn.k_norm.weight": "model-00005-of-00006.safetensors",
|
| 1675 |
"model.language_model.layers.59.self_attn.k_proj.weight": "model-00005-of-00006.safetensors",
|
| 1676 |
+
"model.language_model.layers.59.self_attn.o_proj.input_global_scale": "model-00005-of-00006.safetensors",
|
| 1677 |
+
"model.language_model.layers.59.self_attn.o_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1678 |
+
"model.language_model.layers.59.self_attn.o_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1679 |
"model.language_model.layers.59.self_attn.o_proj.weight_scale": "model-00005-of-00006.safetensors",
|
| 1680 |
"model.language_model.layers.59.self_attn.q_norm.weight": "model-00005-of-00006.safetensors",
|
| 1681 |
"model.language_model.layers.59.self_attn.q_proj.weight": "model-00005-of-00006.safetensors",
|
|
|
|
| 1682 |
"model.language_model.layers.59.self_attn.v_proj.weight": "model-00005-of-00006.safetensors",
|
|
|
|
| 1683 |
"model.language_model.layers.6.input_layernorm.weight": "model-00005-of-00006.safetensors",
|
| 1684 |
"model.language_model.layers.6.linear_attn.A_log": "model-00005-of-00006.safetensors",
|
| 1685 |
"model.language_model.layers.6.linear_attn.conv1d.weight": "model-00005-of-00006.safetensors",
|
|
|
|
| 1806 |
"model.language_model.layers.62.linear_attn.out_proj.input_global_scale": "model-00005-of-00006.safetensors",
|
| 1807 |
"model.language_model.layers.62.linear_attn.out_proj.weight_global_scale": "model-00005-of-00006.safetensors",
|
| 1808 |
"model.language_model.layers.62.linear_attn.out_proj.weight_packed": "model-00005-of-00006.safetensors",
|
| 1809 |
+
"model.language_model.layers.62.linear_attn.out_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1810 |
+
"model.language_model.layers.62.mlp.down_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1811 |
+
"model.language_model.layers.62.mlp.down_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1812 |
+
"model.language_model.layers.62.mlp.down_proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1813 |
+
"model.language_model.layers.62.mlp.down_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1814 |
+
"model.language_model.layers.62.mlp.gate_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1815 |
+
"model.language_model.layers.62.mlp.gate_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1816 |
"model.language_model.layers.62.mlp.gate_proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1817 |
"model.language_model.layers.62.mlp.gate_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1818 |
"model.language_model.layers.62.mlp.up_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 1821 |
"model.language_model.layers.62.mlp.up_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1822 |
"model.language_model.layers.62.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1823 |
"model.language_model.layers.63.input_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1824 |
+
"model.language_model.layers.63.mlp.down_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1825 |
+
"model.language_model.layers.63.mlp.down_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1826 |
+
"model.language_model.layers.63.mlp.down_proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1827 |
"model.language_model.layers.63.mlp.down_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1828 |
"model.language_model.layers.63.mlp.gate_proj.weight": "model-00006-of-00006.safetensors",
|
| 1829 |
"model.language_model.layers.63.mlp.up_proj.weight": "model-00006-of-00006.safetensors",
|
| 1830 |
"model.language_model.layers.63.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1831 |
"model.language_model.layers.63.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
|
| 1832 |
"model.language_model.layers.63.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
|
| 1833 |
+
"model.language_model.layers.63.self_attn.k_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1834 |
"model.language_model.layers.63.self_attn.o_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1835 |
"model.language_model.layers.63.self_attn.o_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1836 |
"model.language_model.layers.63.self_attn.o_proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1837 |
"model.language_model.layers.63.self_attn.o_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1838 |
"model.language_model.layers.63.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
|
| 1839 |
"model.language_model.layers.63.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
|
| 1840 |
+
"model.language_model.layers.63.self_attn.q_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1841 |
"model.language_model.layers.63.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
|
| 1842 |
+
"model.language_model.layers.63.self_attn.v_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1843 |
"model.language_model.layers.7.input_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1844 |
"model.language_model.layers.7.mlp.down_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1845 |
"model.language_model.layers.7.mlp.down_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 1856 |
"model.language_model.layers.7.post_attention_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1857 |
"model.language_model.layers.7.self_attn.k_norm.weight": "model-00006-of-00006.safetensors",
|
| 1858 |
"model.language_model.layers.7.self_attn.k_proj.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 1859 |
"model.language_model.layers.7.self_attn.o_proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1860 |
"model.language_model.layers.7.self_attn.o_proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1861 |
"model.language_model.layers.7.self_attn.o_proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1862 |
"model.language_model.layers.7.self_attn.o_proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1863 |
"model.language_model.layers.7.self_attn.q_norm.weight": "model-00006-of-00006.safetensors",
|
| 1864 |
"model.language_model.layers.7.self_attn.q_proj.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 1865 |
"model.language_model.layers.7.self_attn.v_proj.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 1866 |
"model.language_model.layers.8.input_layernorm.weight": "model-00006-of-00006.safetensors",
|
| 1867 |
"model.language_model.layers.8.linear_attn.A_log": "model-00006-of-00006.safetensors",
|
| 1868 |
"model.language_model.layers.8.linear_attn.conv1d.weight": "model-00006-of-00006.safetensors",
|
|
|
|
| 1965 |
"model.visual.blocks.1.norm2.bias": "model-00006-of-00006.safetensors",
|
| 1966 |
"model.visual.blocks.1.norm2.weight": "model-00006-of-00006.safetensors",
|
| 1967 |
"model.visual.blocks.10.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 1968 |
+
"model.visual.blocks.10.attn.proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1969 |
+
"model.visual.blocks.10.attn.proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1970 |
+
"model.visual.blocks.10.attn.proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1971 |
+
"model.visual.blocks.10.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1972 |
"model.visual.blocks.10.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 1973 |
+
"model.visual.blocks.10.attn.qkv.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1974 |
+
"model.visual.blocks.10.attn.qkv.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1975 |
+
"model.visual.blocks.10.attn.qkv.weight_packed": "model-00006-of-00006.safetensors",
|
| 1976 |
"model.visual.blocks.10.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 1977 |
"model.visual.blocks.10.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 1978 |
"model.visual.blocks.10.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 1989 |
"model.visual.blocks.10.norm2.bias": "model-00006-of-00006.safetensors",
|
| 1990 |
"model.visual.blocks.10.norm2.weight": "model-00006-of-00006.safetensors",
|
| 1991 |
"model.visual.blocks.11.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 1992 |
+
"model.visual.blocks.11.attn.proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1993 |
+
"model.visual.blocks.11.attn.proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1994 |
+
"model.visual.blocks.11.attn.proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 1995 |
"model.visual.blocks.11.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 1996 |
"model.visual.blocks.11.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 1997 |
+
"model.visual.blocks.11.attn.qkv.input_global_scale": "model-00006-of-00006.safetensors",
|
| 1998 |
+
"model.visual.blocks.11.attn.qkv.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 1999 |
+
"model.visual.blocks.11.attn.qkv.weight_packed": "model-00006-of-00006.safetensors",
|
| 2000 |
"model.visual.blocks.11.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2001 |
"model.visual.blocks.11.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2002 |
"model.visual.blocks.11.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 2018 |
"model.visual.blocks.12.attn.proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 2019 |
"model.visual.blocks.12.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 2020 |
"model.visual.blocks.12.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2021 |
+
"model.visual.blocks.12.attn.qkv.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2022 |
+
"model.visual.blocks.12.attn.qkv.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 2023 |
+
"model.visual.blocks.12.attn.qkv.weight_packed": "model-00006-of-00006.safetensors",
|
| 2024 |
"model.visual.blocks.12.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2025 |
"model.visual.blocks.12.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2026 |
"model.visual.blocks.12.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 2037 |
"model.visual.blocks.12.norm2.bias": "model-00006-of-00006.safetensors",
|
| 2038 |
"model.visual.blocks.12.norm2.weight": "model-00006-of-00006.safetensors",
|
| 2039 |
"model.visual.blocks.13.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 2040 |
+
"model.visual.blocks.13.attn.proj.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2041 |
+
"model.visual.blocks.13.attn.proj.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 2042 |
+
"model.visual.blocks.13.attn.proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 2043 |
"model.visual.blocks.13.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 2044 |
"model.visual.blocks.13.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2045 |
"model.visual.blocks.13.attn.qkv.weight": "model-00006-of-00006.safetensors",
|
| 2046 |
+
"model.visual.blocks.13.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2047 |
"model.visual.blocks.13.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2048 |
"model.visual.blocks.13.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2049 |
"model.visual.blocks.13.mlp.linear_fc1.weight_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 2064 |
"model.visual.blocks.14.attn.proj.weight_packed": "model-00006-of-00006.safetensors",
|
| 2065 |
"model.visual.blocks.14.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 2066 |
"model.visual.blocks.14.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2067 |
+
"model.visual.blocks.14.attn.qkv.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2068 |
+
"model.visual.blocks.14.attn.qkv.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 2069 |
+
"model.visual.blocks.14.attn.qkv.weight_packed": "model-00006-of-00006.safetensors",
|
| 2070 |
"model.visual.blocks.14.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2071 |
"model.visual.blocks.14.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2072 |
"model.visual.blocks.14.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 2320 |
"model.visual.blocks.24.mlp.linear_fc1.weight_packed": "model-00006-of-00006.safetensors",
|
| 2321 |
"model.visual.blocks.24.mlp.linear_fc1.weight_scale": "model-00006-of-00006.safetensors",
|
| 2322 |
"model.visual.blocks.24.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
|
| 2323 |
+
"model.visual.blocks.24.mlp.linear_fc2.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2324 |
+
"model.visual.blocks.24.mlp.linear_fc2.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 2325 |
+
"model.visual.blocks.24.mlp.linear_fc2.weight_packed": "model-00006-of-00006.safetensors",
|
| 2326 |
+
"model.visual.blocks.24.mlp.linear_fc2.weight_scale": "model-00006-of-00006.safetensors",
|
| 2327 |
"model.visual.blocks.24.norm1.bias": "model-00006-of-00006.safetensors",
|
| 2328 |
"model.visual.blocks.24.norm1.weight": "model-00006-of-00006.safetensors",
|
| 2329 |
"model.visual.blocks.24.norm2.bias": "model-00006-of-00006.safetensors",
|
|
|
|
| 2399 |
"model.visual.blocks.5.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2400 |
"model.visual.blocks.5.attn.qkv.weight": "model-00006-of-00006.safetensors",
|
| 2401 |
"model.visual.blocks.5.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2402 |
+
"model.visual.blocks.5.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2403 |
+
"model.visual.blocks.5.mlp.linear_fc1.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 2404 |
+
"model.visual.blocks.5.mlp.linear_fc1.weight_packed": "model-00006-of-00006.safetensors",
|
| 2405 |
+
"model.visual.blocks.5.mlp.linear_fc1.weight_scale": "model-00006-of-00006.safetensors",
|
| 2406 |
"model.visual.blocks.5.mlp.linear_fc2.bias": "model-00006-of-00006.safetensors",
|
| 2407 |
"model.visual.blocks.5.mlp.linear_fc2.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2408 |
"model.visual.blocks.5.mlp.linear_fc2.weight_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 2433 |
"model.visual.blocks.6.norm2.weight": "model-00006-of-00006.safetensors",
|
| 2434 |
"model.visual.blocks.7.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 2435 |
"model.visual.blocks.7.attn.proj.weight": "model-00006-of-00006.safetensors",
|
| 2436 |
+
"model.visual.blocks.7.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 2437 |
"model.visual.blocks.7.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2438 |
+
"model.visual.blocks.7.attn.qkv.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2439 |
+
"model.visual.blocks.7.attn.qkv.weight_global_scale": "model-00006-of-00006.safetensors",
|
| 2440 |
+
"model.visual.blocks.7.attn.qkv.weight_packed": "model-00006-of-00006.safetensors",
|
| 2441 |
"model.visual.blocks.7.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2442 |
"model.visual.blocks.7.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2443 |
"model.visual.blocks.7.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 2457 |
"model.visual.blocks.8.attn.proj.weight": "model-00006-of-00006.safetensors",
|
| 2458 |
"model.visual.blocks.8.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2459 |
"model.visual.blocks.8.attn.qkv.weight": "model-00006-of-00006.safetensors",
|
| 2460 |
+
"model.visual.blocks.8.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|
| 2461 |
"model.visual.blocks.8.mlp.linear_fc1.bias": "model-00006-of-00006.safetensors",
|
| 2462 |
"model.visual.blocks.8.mlp.linear_fc1.input_global_scale": "model-00006-of-00006.safetensors",
|
| 2463 |
"model.visual.blocks.8.mlp.linear_fc1.weight_global_scale": "model-00006-of-00006.safetensors",
|
|
|
|
| 2474 |
"model.visual.blocks.8.norm2.weight": "model-00006-of-00006.safetensors",
|
| 2475 |
"model.visual.blocks.9.attn.proj.bias": "model-00006-of-00006.safetensors",
|
| 2476 |
"model.visual.blocks.9.attn.proj.weight": "model-00006-of-00006.safetensors",
|
| 2477 |
+
"model.visual.blocks.9.attn.proj.weight_scale": "model-00006-of-00006.safetensors",
|
| 2478 |
"model.visual.blocks.9.attn.qkv.bias": "model-00006-of-00006.safetensors",
|
| 2479 |
"model.visual.blocks.9.attn.qkv.weight": "model-00006-of-00006.safetensors",
|
| 2480 |
"model.visual.blocks.9.attn.qkv.weight_scale": "model-00006-of-00006.safetensors",
|