mikeumus-divincian commited on
Commit
5d15eda
·
verified ·
1 Parent(s): 56e1238

Add gate_vectors_index.json

Browse files
Files changed (1) hide show
  1. gate_vectors_index.json +645 -0
gate_vectors_index.json ADDED
@@ -0,0 +1,645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 2,
3
+ "model_id": "unsloth/DeepSeek-V4-Flash",
4
+ "model_config": {
5
+ "moe": {
6
+ "model_type": "deepseek_v4",
7
+ "num_layers": 43,
8
+ "hidden_size": 4096,
9
+ "intermediate_size": 18432,
10
+ "moe_intermediate_size": 2048,
11
+ "n_routed_experts": 256,
12
+ "n_shared_experts": 1,
13
+ "num_experts_per_tok": 6,
14
+ "first_k_dense_replace": 0,
15
+ "torch_dtype": "bfloat16",
16
+ "quant_method": "fp8",
17
+ "templates": {
18
+ "expert_gate_proj": "layers.{layer}.ffn.experts.{e}.w1.weight",
19
+ "expert_up_proj": "layers.{layer}.ffn.experts.{e}.w3.weight",
20
+ "expert_down_proj": "layers.{layer}.ffn.experts.{e}.w2.weight",
21
+ "fused_gate_proj": "layers.{layer}.ffn.experts.w1",
22
+ "fused_down_proj": "layers.{layer}.ffn.experts.w2",
23
+ "shared_down_proj": [
24
+ "layers.{layer}.ffn.shared_experts.w2.weight"
25
+ ],
26
+ "router": [
27
+ "layers.{layer}.ffn.gate.weight",
28
+ "layers.{layer}.ffn.router.weight"
29
+ ],
30
+ "dense_down_proj": [
31
+ "layers.{layer}.ffn.w2.weight"
32
+ ]
33
+ }
34
+ }
35
+ },
36
+ "num_feats": 64,
37
+ "dtype": "float32",
38
+ "gate_vector_semantics": "right_singular_vectors_of_gate_proj",
39
+ "layers": {
40
+ "0": {
41
+ "shape": [
42
+ 256,
43
+ 64,
44
+ 4096
45
+ ],
46
+ "file_offset": 0
47
+ },
48
+ "1": {
49
+ "shape": [
50
+ 256,
51
+ 64,
52
+ 4096
53
+ ],
54
+ "file_offset": 268435456
55
+ },
56
+ "2": {
57
+ "shape": [
58
+ 256,
59
+ 64,
60
+ 4096
61
+ ],
62
+ "file_offset": 536870912
63
+ },
64
+ "3": {
65
+ "shape": [
66
+ 256,
67
+ 64,
68
+ 4096
69
+ ],
70
+ "file_offset": 805306368
71
+ },
72
+ "4": {
73
+ "shape": [
74
+ 256,
75
+ 64,
76
+ 4096
77
+ ],
78
+ "file_offset": 1073741824
79
+ },
80
+ "5": {
81
+ "shape": [
82
+ 256,
83
+ 64,
84
+ 4096
85
+ ],
86
+ "file_offset": 1342177280
87
+ },
88
+ "6": {
89
+ "shape": [
90
+ 256,
91
+ 64,
92
+ 4096
93
+ ],
94
+ "file_offset": 1610612736
95
+ },
96
+ "7": {
97
+ "shape": [
98
+ 256,
99
+ 64,
100
+ 4096
101
+ ],
102
+ "file_offset": 1879048192
103
+ },
104
+ "8": {
105
+ "shape": [
106
+ 256,
107
+ 64,
108
+ 4096
109
+ ],
110
+ "file_offset": 2147483648
111
+ },
112
+ "9": {
113
+ "shape": [
114
+ 256,
115
+ 64,
116
+ 4096
117
+ ],
118
+ "file_offset": 2415919104
119
+ },
120
+ "10": {
121
+ "shape": [
122
+ 256,
123
+ 64,
124
+ 4096
125
+ ],
126
+ "file_offset": 2684354560
127
+ },
128
+ "11": {
129
+ "shape": [
130
+ 256,
131
+ 64,
132
+ 4096
133
+ ],
134
+ "file_offset": 2952790016
135
+ },
136
+ "12": {
137
+ "shape": [
138
+ 256,
139
+ 64,
140
+ 4096
141
+ ],
142
+ "file_offset": 3221225472
143
+ },
144
+ "13": {
145
+ "shape": [
146
+ 256,
147
+ 64,
148
+ 4096
149
+ ],
150
+ "file_offset": 3489660928
151
+ },
152
+ "14": {
153
+ "shape": [
154
+ 256,
155
+ 64,
156
+ 4096
157
+ ],
158
+ "file_offset": 3758096384
159
+ },
160
+ "15": {
161
+ "shape": [
162
+ 256,
163
+ 64,
164
+ 4096
165
+ ],
166
+ "file_offset": 4026531840
167
+ },
168
+ "16": {
169
+ "shape": [
170
+ 256,
171
+ 64,
172
+ 4096
173
+ ],
174
+ "file_offset": 4294967296
175
+ },
176
+ "17": {
177
+ "shape": [
178
+ 256,
179
+ 64,
180
+ 4096
181
+ ],
182
+ "file_offset": 4563402752
183
+ },
184
+ "18": {
185
+ "shape": [
186
+ 256,
187
+ 64,
188
+ 4096
189
+ ],
190
+ "file_offset": 4831838208
191
+ },
192
+ "19": {
193
+ "shape": [
194
+ 256,
195
+ 64,
196
+ 4096
197
+ ],
198
+ "file_offset": 5100273664
199
+ },
200
+ "20": {
201
+ "shape": [
202
+ 256,
203
+ 64,
204
+ 4096
205
+ ],
206
+ "file_offset": 5368709120
207
+ },
208
+ "21": {
209
+ "shape": [
210
+ 256,
211
+ 64,
212
+ 4096
213
+ ],
214
+ "file_offset": 5637144576
215
+ },
216
+ "22": {
217
+ "shape": [
218
+ 256,
219
+ 64,
220
+ 4096
221
+ ],
222
+ "file_offset": 5905580032
223
+ },
224
+ "23": {
225
+ "shape": [
226
+ 256,
227
+ 64,
228
+ 4096
229
+ ],
230
+ "file_offset": 6174015488
231
+ },
232
+ "24": {
233
+ "shape": [
234
+ 256,
235
+ 64,
236
+ 4096
237
+ ],
238
+ "file_offset": 6442450944
239
+ },
240
+ "25": {
241
+ "shape": [
242
+ 256,
243
+ 64,
244
+ 4096
245
+ ],
246
+ "file_offset": 6710886400
247
+ },
248
+ "26": {
249
+ "shape": [
250
+ 256,
251
+ 64,
252
+ 4096
253
+ ],
254
+ "file_offset": 6979321856
255
+ },
256
+ "27": {
257
+ "shape": [
258
+ 256,
259
+ 64,
260
+ 4096
261
+ ],
262
+ "file_offset": 7247757312
263
+ },
264
+ "28": {
265
+ "shape": [
266
+ 256,
267
+ 64,
268
+ 4096
269
+ ],
270
+ "file_offset": 7516192768
271
+ },
272
+ "29": {
273
+ "shape": [
274
+ 256,
275
+ 64,
276
+ 4096
277
+ ],
278
+ "file_offset": 7784628224
279
+ },
280
+ "30": {
281
+ "shape": [
282
+ 256,
283
+ 64,
284
+ 4096
285
+ ],
286
+ "file_offset": 8053063680
287
+ },
288
+ "31": {
289
+ "shape": [
290
+ 256,
291
+ 64,
292
+ 4096
293
+ ],
294
+ "file_offset": 8321499136
295
+ },
296
+ "32": {
297
+ "shape": [
298
+ 256,
299
+ 64,
300
+ 4096
301
+ ],
302
+ "file_offset": 8589934592
303
+ },
304
+ "33": {
305
+ "shape": [
306
+ 256,
307
+ 64,
308
+ 4096
309
+ ],
310
+ "file_offset": 8858370048
311
+ },
312
+ "34": {
313
+ "shape": [
314
+ 256,
315
+ 64,
316
+ 4096
317
+ ],
318
+ "file_offset": 9126805504
319
+ },
320
+ "35": {
321
+ "shape": [
322
+ 256,
323
+ 64,
324
+ 4096
325
+ ],
326
+ "file_offset": 9395240960
327
+ },
328
+ "36": {
329
+ "shape": [
330
+ 256,
331
+ 64,
332
+ 4096
333
+ ],
334
+ "file_offset": 9663676416
335
+ },
336
+ "37": {
337
+ "shape": [
338
+ 256,
339
+ 64,
340
+ 4096
341
+ ],
342
+ "file_offset": 9932111872
343
+ },
344
+ "38": {
345
+ "shape": [
346
+ 256,
347
+ 64,
348
+ 4096
349
+ ],
350
+ "file_offset": 10200547328
351
+ },
352
+ "39": {
353
+ "shape": [
354
+ 256,
355
+ 64,
356
+ 4096
357
+ ],
358
+ "file_offset": 10468982784
359
+ },
360
+ "40": {
361
+ "shape": [
362
+ 256,
363
+ 64,
364
+ 4096
365
+ ],
366
+ "file_offset": 10737418240
367
+ },
368
+ "41": {
369
+ "shape": [
370
+ 256,
371
+ 64,
372
+ 4096
373
+ ],
374
+ "file_offset": 11005853696
375
+ },
376
+ "42": {
377
+ "shape": [
378
+ 256,
379
+ 64,
380
+ 4096
381
+ ],
382
+ "file_offset": 11274289152
383
+ }
384
+ },
385
+ "layer_stats": {
386
+ "0": {
387
+ "median_var64": 0.1002,
388
+ "q25_var64": 0.0998,
389
+ "q75_var64": 0.1007,
390
+ "n_experts": 256
391
+ },
392
+ "1": {
393
+ "median_var64": 0.0916,
394
+ "q25_var64": 0.0914,
395
+ "q75_var64": 0.0917,
396
+ "n_experts": 256
397
+ },
398
+ "2": {
399
+ "median_var64": 0.0868,
400
+ "q25_var64": 0.0868,
401
+ "q75_var64": 0.0869,
402
+ "n_experts": 256
403
+ },
404
+ "3": {
405
+ "median_var64": 0.104,
406
+ "q25_var64": 0.0981,
407
+ "q75_var64": 0.1091,
408
+ "n_experts": 256
409
+ },
410
+ "4": {
411
+ "median_var64": 0.108,
412
+ "q25_var64": 0.1023,
413
+ "q75_var64": 0.1174,
414
+ "n_experts": 256
415
+ },
416
+ "5": {
417
+ "median_var64": 0.1097,
418
+ "q25_var64": 0.1016,
419
+ "q75_var64": 0.1185,
420
+ "n_experts": 256
421
+ },
422
+ "6": {
423
+ "median_var64": 0.1092,
424
+ "q25_var64": 0.1021,
425
+ "q75_var64": 0.1159,
426
+ "n_experts": 256
427
+ },
428
+ "7": {
429
+ "median_var64": 0.1071,
430
+ "q25_var64": 0.0997,
431
+ "q75_var64": 0.1169,
432
+ "n_experts": 256
433
+ },
434
+ "8": {
435
+ "median_var64": 0.1092,
436
+ "q25_var64": 0.1028,
437
+ "q75_var64": 0.1163,
438
+ "n_experts": 256
439
+ },
440
+ "9": {
441
+ "median_var64": 0.1081,
442
+ "q25_var64": 0.1013,
443
+ "q75_var64": 0.1201,
444
+ "n_experts": 256
445
+ },
446
+ "10": {
447
+ "median_var64": 0.116,
448
+ "q25_var64": 0.1083,
449
+ "q75_var64": 0.1319,
450
+ "n_experts": 256
451
+ },
452
+ "11": {
453
+ "median_var64": 0.1182,
454
+ "q25_var64": 0.1054,
455
+ "q75_var64": 0.1328,
456
+ "n_experts": 256
457
+ },
458
+ "12": {
459
+ "median_var64": 0.1284,
460
+ "q25_var64": 0.1123,
461
+ "q75_var64": 0.1444,
462
+ "n_experts": 256
463
+ },
464
+ "13": {
465
+ "median_var64": 0.1291,
466
+ "q25_var64": 0.115,
467
+ "q75_var64": 0.1447,
468
+ "n_experts": 256
469
+ },
470
+ "14": {
471
+ "median_var64": 0.1378,
472
+ "q25_var64": 0.1191,
473
+ "q75_var64": 0.154,
474
+ "n_experts": 256
475
+ },
476
+ "15": {
477
+ "median_var64": 0.1421,
478
+ "q25_var64": 0.1152,
479
+ "q75_var64": 0.1615,
480
+ "n_experts": 256
481
+ },
482
+ "16": {
483
+ "median_var64": 0.1533,
484
+ "q25_var64": 0.134,
485
+ "q75_var64": 0.1781,
486
+ "n_experts": 256
487
+ },
488
+ "17": {
489
+ "median_var64": 0.1587,
490
+ "q25_var64": 0.1385,
491
+ "q75_var64": 0.1802,
492
+ "n_experts": 256
493
+ },
494
+ "18": {
495
+ "median_var64": 0.1649,
496
+ "q25_var64": 0.1433,
497
+ "q75_var64": 0.1846,
498
+ "n_experts": 256
499
+ },
500
+ "19": {
501
+ "median_var64": 0.1268,
502
+ "q25_var64": 0.1112,
503
+ "q75_var64": 0.1432,
504
+ "n_experts": 256
505
+ },
506
+ "20": {
507
+ "median_var64": 0.1575,
508
+ "q25_var64": 0.1407,
509
+ "q75_var64": 0.1816,
510
+ "n_experts": 256
511
+ },
512
+ "21": {
513
+ "median_var64": 0.1449,
514
+ "q25_var64": 0.1164,
515
+ "q75_var64": 0.1679,
516
+ "n_experts": 256
517
+ },
518
+ "22": {
519
+ "median_var64": 0.1376,
520
+ "q25_var64": 0.1083,
521
+ "q75_var64": 0.1613,
522
+ "n_experts": 256
523
+ },
524
+ "23": {
525
+ "median_var64": 0.0919,
526
+ "q25_var64": 0.0881,
527
+ "q75_var64": 0.1025,
528
+ "n_experts": 256
529
+ },
530
+ "24": {
531
+ "median_var64": 0.1051,
532
+ "q25_var64": 0.0942,
533
+ "q75_var64": 0.1209,
534
+ "n_experts": 256
535
+ },
536
+ "25": {
537
+ "median_var64": 0.0918,
538
+ "q25_var64": 0.0877,
539
+ "q75_var64": 0.105,
540
+ "n_experts": 256
541
+ },
542
+ "26": {
543
+ "median_var64": 0.0965,
544
+ "q25_var64": 0.0908,
545
+ "q75_var64": 0.1096,
546
+ "n_experts": 256
547
+ },
548
+ "27": {
549
+ "median_var64": 0.0869,
550
+ "q25_var64": 0.0852,
551
+ "q75_var64": 0.0934,
552
+ "n_experts": 256
553
+ },
554
+ "28": {
555
+ "median_var64": 0.0939,
556
+ "q25_var64": 0.0892,
557
+ "q75_var64": 0.1041,
558
+ "n_experts": 256
559
+ },
560
+ "29": {
561
+ "median_var64": 0.0931,
562
+ "q25_var64": 0.0877,
563
+ "q75_var64": 0.109,
564
+ "n_experts": 256
565
+ },
566
+ "30": {
567
+ "median_var64": 0.0944,
568
+ "q25_var64": 0.0886,
569
+ "q75_var64": 0.1132,
570
+ "n_experts": 256
571
+ },
572
+ "31": {
573
+ "median_var64": 0.0917,
574
+ "q25_var64": 0.0875,
575
+ "q75_var64": 0.1096,
576
+ "n_experts": 256
577
+ },
578
+ "32": {
579
+ "median_var64": 0.0953,
580
+ "q25_var64": 0.0901,
581
+ "q75_var64": 0.1042,
582
+ "n_experts": 256
583
+ },
584
+ "33": {
585
+ "median_var64": 0.0947,
586
+ "q25_var64": 0.0892,
587
+ "q75_var64": 0.1062,
588
+ "n_experts": 256
589
+ },
590
+ "34": {
591
+ "median_var64": 0.0925,
592
+ "q25_var64": 0.0893,
593
+ "q75_var64": 0.103,
594
+ "n_experts": 256
595
+ },
596
+ "35": {
597
+ "median_var64": 0.0989,
598
+ "q25_var64": 0.0919,
599
+ "q75_var64": 0.1154,
600
+ "n_experts": 256
601
+ },
602
+ "36": {
603
+ "median_var64": 0.0964,
604
+ "q25_var64": 0.0902,
605
+ "q75_var64": 0.1098,
606
+ "n_experts": 256
607
+ },
608
+ "37": {
609
+ "median_var64": 0.0974,
610
+ "q25_var64": 0.0916,
611
+ "q75_var64": 0.1123,
612
+ "n_experts": 256
613
+ },
614
+ "38": {
615
+ "median_var64": 0.1017,
616
+ "q25_var64": 0.0939,
617
+ "q75_var64": 0.1144,
618
+ "n_experts": 256
619
+ },
620
+ "39": {
621
+ "median_var64": 0.1248,
622
+ "q25_var64": 0.112,
623
+ "q75_var64": 0.1414,
624
+ "n_experts": 256
625
+ },
626
+ "40": {
627
+ "median_var64": 0.1186,
628
+ "q25_var64": 0.1047,
629
+ "q75_var64": 0.1371,
630
+ "n_experts": 256
631
+ },
632
+ "41": {
633
+ "median_var64": 0.1214,
634
+ "q25_var64": 0.1065,
635
+ "q75_var64": 0.1415,
636
+ "n_experts": 256
637
+ },
638
+ "42": {
639
+ "median_var64": 0.1191,
640
+ "q25_var64": 0.1082,
641
+ "q75_var64": 0.1362,
642
+ "n_experts": 256
643
+ }
644
+ }
645
+ }