cpatonn commited on
Commit
affb52d
·
verified ·
1 Parent(s): 858208a

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -172,6 +172,10 @@
172
  "model.language_model.layers.2.mlp.shared_expert.up_proj",
173
  "model.language_model.layers.2.mlp.shared_expert.down_proj",
174
  "model.language_model.layers.2.mlp.shared_expert_gate",
 
 
 
 
175
  "model.language_model.layers.3.mlp.shared_expert.gate_proj",
176
  "model.language_model.layers.3.mlp.shared_expert.up_proj",
177
  "model.language_model.layers.3.mlp.shared_expert.down_proj",
@@ -206,6 +210,10 @@
206
  "model.language_model.layers.6.mlp.shared_expert.up_proj",
207
  "model.language_model.layers.6.mlp.shared_expert.down_proj",
208
  "model.language_model.layers.6.mlp.shared_expert_gate",
 
 
 
 
209
  "model.language_model.layers.7.mlp.shared_expert.gate_proj",
210
  "model.language_model.layers.7.mlp.shared_expert.up_proj",
211
  "model.language_model.layers.7.mlp.shared_expert.down_proj",
@@ -240,6 +248,10 @@
240
  "model.language_model.layers.10.mlp.shared_expert.up_proj",
241
  "model.language_model.layers.10.mlp.shared_expert.down_proj",
242
  "model.language_model.layers.10.mlp.shared_expert_gate",
 
 
 
 
243
  "model.language_model.layers.11.mlp.shared_expert.gate_proj",
244
  "model.language_model.layers.11.mlp.shared_expert.up_proj",
245
  "model.language_model.layers.11.mlp.shared_expert.down_proj",
@@ -274,6 +286,10 @@
274
  "model.language_model.layers.14.mlp.shared_expert.up_proj",
275
  "model.language_model.layers.14.mlp.shared_expert.down_proj",
276
  "model.language_model.layers.14.mlp.shared_expert_gate",
 
 
 
 
277
  "model.language_model.layers.15.mlp.shared_expert.gate_proj",
278
  "model.language_model.layers.15.mlp.shared_expert.up_proj",
279
  "model.language_model.layers.15.mlp.shared_expert.down_proj",
@@ -308,6 +324,10 @@
308
  "model.language_model.layers.18.mlp.shared_expert.up_proj",
309
  "model.language_model.layers.18.mlp.shared_expert.down_proj",
310
  "model.language_model.layers.18.mlp.shared_expert_gate",
 
 
 
 
311
  "model.language_model.layers.19.mlp.shared_expert.gate_proj",
312
  "model.language_model.layers.19.mlp.shared_expert.up_proj",
313
  "model.language_model.layers.19.mlp.shared_expert.down_proj",
@@ -342,6 +362,10 @@
342
  "model.language_model.layers.22.mlp.shared_expert.up_proj",
343
  "model.language_model.layers.22.mlp.shared_expert.down_proj",
344
  "model.language_model.layers.22.mlp.shared_expert_gate",
 
 
 
 
345
  "model.language_model.layers.23.mlp.shared_expert.gate_proj",
346
  "model.language_model.layers.23.mlp.shared_expert.up_proj",
347
  "model.language_model.layers.23.mlp.shared_expert.down_proj",
@@ -376,6 +400,10 @@
376
  "model.language_model.layers.26.mlp.shared_expert.up_proj",
377
  "model.language_model.layers.26.mlp.shared_expert.down_proj",
378
  "model.language_model.layers.26.mlp.shared_expert_gate",
 
 
 
 
379
  "model.language_model.layers.27.mlp.shared_expert.gate_proj",
380
  "model.language_model.layers.27.mlp.shared_expert.up_proj",
381
  "model.language_model.layers.27.mlp.shared_expert.down_proj",
@@ -410,6 +438,10 @@
410
  "model.language_model.layers.30.mlp.shared_expert.up_proj",
411
  "model.language_model.layers.30.mlp.shared_expert.down_proj",
412
  "model.language_model.layers.30.mlp.shared_expert_gate",
 
 
 
 
413
  "model.language_model.layers.31.mlp.shared_expert.gate_proj",
414
  "model.language_model.layers.31.mlp.shared_expert.up_proj",
415
  "model.language_model.layers.31.mlp.shared_expert.down_proj",
@@ -444,6 +476,10 @@
444
  "model.language_model.layers.34.mlp.shared_expert.up_proj",
445
  "model.language_model.layers.34.mlp.shared_expert.down_proj",
446
  "model.language_model.layers.34.mlp.shared_expert_gate",
 
 
 
 
447
  "model.language_model.layers.35.mlp.shared_expert.gate_proj",
448
  "model.language_model.layers.35.mlp.shared_expert.up_proj",
449
  "model.language_model.layers.35.mlp.shared_expert.down_proj",
@@ -478,56 +514,24 @@
478
  "model.language_model.layers.38.mlp.shared_expert.up_proj",
479
  "model.language_model.layers.38.mlp.shared_expert.down_proj",
480
  "model.language_model.layers.38.mlp.shared_expert_gate",
 
 
 
 
481
  "model.language_model.layers.39.mlp.shared_expert.gate_proj",
482
  "model.language_model.layers.39.mlp.shared_expert.up_proj",
483
  "model.language_model.layers.39.mlp.shared_expert.down_proj",
484
  "model.language_model.layers.39.mlp.shared_expert_gate",
485
- "model.language_model.layers.0.mlp.gate",
486
- "model.language_model.layers.1.mlp.gate",
487
- "model.language_model.layers.2.mlp.gate",
488
- "model.language_model.layers.3.mlp.gate",
489
- "model.language_model.layers.4.mlp.gate",
490
- "model.language_model.layers.5.mlp.gate",
491
- "model.language_model.layers.6.mlp.gate",
492
- "model.language_model.layers.7.mlp.gate",
493
- "model.language_model.layers.8.mlp.gate",
494
- "model.language_model.layers.9.mlp.gate",
495
- "model.language_model.layers.10.mlp.gate",
496
- "model.language_model.layers.11.mlp.gate",
497
- "model.language_model.layers.12.mlp.gate",
498
- "model.language_model.layers.13.mlp.gate",
499
- "model.language_model.layers.14.mlp.gate",
500
- "model.language_model.layers.15.mlp.gate",
501
- "model.language_model.layers.16.mlp.gate",
502
- "model.language_model.layers.17.mlp.gate",
503
- "model.language_model.layers.18.mlp.gate",
504
- "model.language_model.layers.19.mlp.gate",
505
- "model.language_model.layers.20.mlp.gate",
506
- "model.language_model.layers.21.mlp.gate",
507
- "model.language_model.layers.22.mlp.gate",
508
- "model.language_model.layers.23.mlp.gate",
509
- "model.language_model.layers.24.mlp.gate",
510
- "model.language_model.layers.25.mlp.gate",
511
- "model.language_model.layers.26.mlp.gate",
512
- "model.language_model.layers.27.mlp.gate",
513
- "model.language_model.layers.28.mlp.gate",
514
- "model.language_model.layers.29.mlp.gate",
515
- "model.language_model.layers.30.mlp.gate",
516
- "model.language_model.layers.31.mlp.gate",
517
- "model.language_model.layers.32.mlp.gate",
518
- "model.language_model.layers.33.mlp.gate",
519
- "model.language_model.layers.34.mlp.gate",
520
- "model.language_model.layers.35.mlp.gate",
521
- "model.language_model.layers.36.mlp.gate",
522
- "model.language_model.layers.37.mlp.gate",
523
- "model.language_model.layers.38.mlp.gate",
524
- "model.language_model.layers.39.mlp.gate",
525
  "mtp.fc",
526
  "mtp.layers.0.mlp.gate",
527
  "mtp.layers.0.mlp.shared_expert.gate_up_proj",
528
  "mtp.layers.0.mlp.shared_expert.gate_proj",
529
  "mtp.layers.0.mlp.shared_expert.up_proj",
530
  "mtp.layers.0.mlp.shared_expert.down_proj",
 
 
 
 
531
  "lm_head"
532
  ],
533
  "kv_cache_scheme": null,
 
172
  "model.language_model.layers.2.mlp.shared_expert.up_proj",
173
  "model.language_model.layers.2.mlp.shared_expert.down_proj",
174
  "model.language_model.layers.2.mlp.shared_expert_gate",
175
+ "model.language_model.layers.3.self_attn.q_proj",
176
+ "model.language_model.layers.3.self_attn.k_proj",
177
+ "model.language_model.layers.3.self_attn.v_proj",
178
+ "model.language_model.layers.3.self_attn.o_proj",
179
  "model.language_model.layers.3.mlp.shared_expert.gate_proj",
180
  "model.language_model.layers.3.mlp.shared_expert.up_proj",
181
  "model.language_model.layers.3.mlp.shared_expert.down_proj",
 
210
  "model.language_model.layers.6.mlp.shared_expert.up_proj",
211
  "model.language_model.layers.6.mlp.shared_expert.down_proj",
212
  "model.language_model.layers.6.mlp.shared_expert_gate",
213
+ "model.language_model.layers.7.self_attn.q_proj",
214
+ "model.language_model.layers.7.self_attn.k_proj",
215
+ "model.language_model.layers.7.self_attn.v_proj",
216
+ "model.language_model.layers.7.self_attn.o_proj",
217
  "model.language_model.layers.7.mlp.shared_expert.gate_proj",
218
  "model.language_model.layers.7.mlp.shared_expert.up_proj",
219
  "model.language_model.layers.7.mlp.shared_expert.down_proj",
 
248
  "model.language_model.layers.10.mlp.shared_expert.up_proj",
249
  "model.language_model.layers.10.mlp.shared_expert.down_proj",
250
  "model.language_model.layers.10.mlp.shared_expert_gate",
251
+ "model.language_model.layers.11.self_attn.q_proj",
252
+ "model.language_model.layers.11.self_attn.k_proj",
253
+ "model.language_model.layers.11.self_attn.v_proj",
254
+ "model.language_model.layers.11.self_attn.o_proj",
255
  "model.language_model.layers.11.mlp.shared_expert.gate_proj",
256
  "model.language_model.layers.11.mlp.shared_expert.up_proj",
257
  "model.language_model.layers.11.mlp.shared_expert.down_proj",
 
286
  "model.language_model.layers.14.mlp.shared_expert.up_proj",
287
  "model.language_model.layers.14.mlp.shared_expert.down_proj",
288
  "model.language_model.layers.14.mlp.shared_expert_gate",
289
+ "model.language_model.layers.15.self_attn.q_proj",
290
+ "model.language_model.layers.15.self_attn.k_proj",
291
+ "model.language_model.layers.15.self_attn.v_proj",
292
+ "model.language_model.layers.15.self_attn.o_proj",
293
  "model.language_model.layers.15.mlp.shared_expert.gate_proj",
294
  "model.language_model.layers.15.mlp.shared_expert.up_proj",
295
  "model.language_model.layers.15.mlp.shared_expert.down_proj",
 
324
  "model.language_model.layers.18.mlp.shared_expert.up_proj",
325
  "model.language_model.layers.18.mlp.shared_expert.down_proj",
326
  "model.language_model.layers.18.mlp.shared_expert_gate",
327
+ "model.language_model.layers.19.self_attn.q_proj",
328
+ "model.language_model.layers.19.self_attn.k_proj",
329
+ "model.language_model.layers.19.self_attn.v_proj",
330
+ "model.language_model.layers.19.self_attn.o_proj",
331
  "model.language_model.layers.19.mlp.shared_expert.gate_proj",
332
  "model.language_model.layers.19.mlp.shared_expert.up_proj",
333
  "model.language_model.layers.19.mlp.shared_expert.down_proj",
 
362
  "model.language_model.layers.22.mlp.shared_expert.up_proj",
363
  "model.language_model.layers.22.mlp.shared_expert.down_proj",
364
  "model.language_model.layers.22.mlp.shared_expert_gate",
365
+ "model.language_model.layers.23.self_attn.q_proj",
366
+ "model.language_model.layers.23.self_attn.k_proj",
367
+ "model.language_model.layers.23.self_attn.v_proj",
368
+ "model.language_model.layers.23.self_attn.o_proj",
369
  "model.language_model.layers.23.mlp.shared_expert.gate_proj",
370
  "model.language_model.layers.23.mlp.shared_expert.up_proj",
371
  "model.language_model.layers.23.mlp.shared_expert.down_proj",
 
400
  "model.language_model.layers.26.mlp.shared_expert.up_proj",
401
  "model.language_model.layers.26.mlp.shared_expert.down_proj",
402
  "model.language_model.layers.26.mlp.shared_expert_gate",
403
+ "model.language_model.layers.27.self_attn.q_proj",
404
+ "model.language_model.layers.27.self_attn.k_proj",
405
+ "model.language_model.layers.27.self_attn.v_proj",
406
+ "model.language_model.layers.27.self_attn.o_proj",
407
  "model.language_model.layers.27.mlp.shared_expert.gate_proj",
408
  "model.language_model.layers.27.mlp.shared_expert.up_proj",
409
  "model.language_model.layers.27.mlp.shared_expert.down_proj",
 
438
  "model.language_model.layers.30.mlp.shared_expert.up_proj",
439
  "model.language_model.layers.30.mlp.shared_expert.down_proj",
440
  "model.language_model.layers.30.mlp.shared_expert_gate",
441
+ "model.language_model.layers.31.self_attn.q_proj",
442
+ "model.language_model.layers.31.self_attn.k_proj",
443
+ "model.language_model.layers.31.self_attn.v_proj",
444
+ "model.language_model.layers.31.self_attn.o_proj",
445
  "model.language_model.layers.31.mlp.shared_expert.gate_proj",
446
  "model.language_model.layers.31.mlp.shared_expert.up_proj",
447
  "model.language_model.layers.31.mlp.shared_expert.down_proj",
 
476
  "model.language_model.layers.34.mlp.shared_expert.up_proj",
477
  "model.language_model.layers.34.mlp.shared_expert.down_proj",
478
  "model.language_model.layers.34.mlp.shared_expert_gate",
479
+ "model.language_model.layers.35.self_attn.q_proj",
480
+ "model.language_model.layers.35.self_attn.k_proj",
481
+ "model.language_model.layers.35.self_attn.v_proj",
482
+ "model.language_model.layers.35.self_attn.o_proj",
483
  "model.language_model.layers.35.mlp.shared_expert.gate_proj",
484
  "model.language_model.layers.35.mlp.shared_expert.up_proj",
485
  "model.language_model.layers.35.mlp.shared_expert.down_proj",
 
514
  "model.language_model.layers.38.mlp.shared_expert.up_proj",
515
  "model.language_model.layers.38.mlp.shared_expert.down_proj",
516
  "model.language_model.layers.38.mlp.shared_expert_gate",
517
+ "model.language_model.layers.39.self_attn.q_proj",
518
+ "model.language_model.layers.39.self_attn.k_proj",
519
+ "model.language_model.layers.39.self_attn.v_proj",
520
+ "model.language_model.layers.39.self_attn.o_proj",
521
  "model.language_model.layers.39.mlp.shared_expert.gate_proj",
522
  "model.language_model.layers.39.mlp.shared_expert.up_proj",
523
  "model.language_model.layers.39.mlp.shared_expert.down_proj",
524
  "model.language_model.layers.39.mlp.shared_expert_gate",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  "mtp.fc",
526
  "mtp.layers.0.mlp.gate",
527
  "mtp.layers.0.mlp.shared_expert.gate_up_proj",
528
  "mtp.layers.0.mlp.shared_expert.gate_proj",
529
  "mtp.layers.0.mlp.shared_expert.up_proj",
530
  "mtp.layers.0.mlp.shared_expert.down_proj",
531
+ "mtp.layers.0.self_attn.k_proj",
532
+ "mtp.layers.0.self_attn.o_proj",
533
+ "mtp.layers.0.self_attn.q_proj",
534
+ "mtp.layers.0.self_attn.v_proj",
535
  "lm_head"
536
  ],
537
  "kv_cache_scheme": null,
model-00001-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:511407ab1269c0f4a3e391e1984b1ef1cd7a101579771dba5b4cf937cf56c8dc
3
- size 5370509296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2070b3db53220b1051d7382f95b66c138a32bf2324abf39054aa950ef458f91d
3
+ size 5374122648
model-00002-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20a165dfe961808f24b2b7dbfb4c22f373a6f9b0c464df229df5c244ac56a23d
3
- size 5372110480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba41d54eb8a2d49531f27c5a493838ffd413c2575df9cf6afedff0db791e9fd2
3
+ size 5370188016
model-00003-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f43aa8efa84f00e13232e0bdd7b97318ba90db2466b8db9a74ba12d59e650b5
3
- size 5371922376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6421d2cdab03682bc5da07c70db47729125812f769b2f3f61ec0a6b7b2bdde2b
3
+ size 5370183944
model-00004-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54fe09d56dfbbeedc1622f6c3a7a54f220c473f232d4418886dba256c7862cc7
3
- size 5372104024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14454ee08bf3344919cecf4106eef287cdcc90d42a350f80b6d89815e562e590
3
+ size 5359586560
model-00005-of-00005.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2aee2973c5a296ba8a64a7255bf3e01777ee28299d1e2352b1fb3e7b3bd1b915
3
- size 2536847080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3885571ec31627a8f0b0bc67243eab0132350de24e343f508cb28502e2b29b92
3
+ size 2980536416
model.safetensors.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43a769307382aa933d9e2cda6b1879ec965a518b0d40b65ea29cb99401dfcf88
3
- size 10637583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8274494060f6e950c1a3cea599c32334584a38a46ac84dfcf087901aa0b86720
3
+ size 10628250
recipe.yaml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_stage:
2
+ default_modifiers:
3
+ AWQModifier:
4
+ config_groups:
5
+ group_0:
6
+ targets: [Linear]
7
+ weights:
8
+ num_bits: 4
9
+ type: int
10
+ symmetric: true
11
+ group_size: 32
12
+ strategy: group
13
+ block_structure: null
14
+ dynamic: false
15
+ actorder: null
16
+ scale_dtype: null
17
+ zp_dtype: null
18
+ observer: mse
19
+ observer_kwargs: {}
20
+ input_activations: null
21
+ output_activations: null
22
+ format: null
23
+ targets: [Linear]
24
+ ignore: ['re:.*embed_tokens', 're:.*linear_attn.*', 're:.*shared_expert.*', 're:.*shared_expert_gate$',
25
+ 're:.*mlp[.]gate$', 're:.*self_attn.*', 're:model[.]visual.*', 're:mtp.*', lm_head]
26
+ bypass_divisibility_checks: false
27
+ mappings:
28
+ - smooth_layer: re:model.*post_attention_layernorm$
29
+ balance_layers: ['re:model.*mlp[.]experts.*gate_proj$', 're:model.*mlp[.]experts.*up_proj$',
30
+ 're:model.*mlp[.]shared_expert[.]gate_proj$', 're:model.*mlp[.]shared_expert[.]up_proj$',
31
+ 're:model.*mlp[.]gate$', 're:model.*mlp[.]shared_expert_gate$']
32
+ activation_hook_target: null
33
+ balance_exponent: 1
34
+ - smooth_layer: re:model.*mlp[.]experts.*up_proj$
35
+ balance_layers: ['re:model.*mlp[.]experts.*down_proj$']
36
+ activation_hook_target: null
37
+ balance_exponent: 1
38
+ offload_device: !!python/object/apply:torch.device [cpu]
39
+ duo_scaling: true
40
+ n_grid: 20