UnstableLlama commited on
Commit
ac378ac
·
verified ·
1 Parent(s): eea923e

Upload 5 files

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. model.safetensors +2 -2
  3. quantization_config.json +58 -58
config.json CHANGED
@@ -62,7 +62,7 @@
62
  "quantization_config": {
63
  "quant_method": "exl3",
64
  "version": "0.0.32",
65
- "bits": 2.5,
66
  "head_bits": 6,
67
  "calibration": {
68
  "rows": 250,
 
62
  "quantization_config": {
63
  "quant_method": "exl3",
64
  "version": "0.0.32",
65
+ "bits": 3.0,
66
  "head_bits": 6,
67
  "calibration": {
68
  "rows": 250,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f3bdaa4934647adbe66d8f67384e74152b9829460b97da630c6295ef2414565
3
- size 148649417
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56002139c64c9ee73aa02b0961ab838efbfa923c593cf7c7761b4eb40352f55a
3
+ size 178533879
quantization_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "quant_method": "exl3",
3
  "version": "0.0.32",
4
- "bits": 2.5,
5
  "head_bits": 6,
6
  "calibration": {
7
  "rows": 250,
@@ -624,14 +624,14 @@
624
  "shape": [
625
  384,
626
  128,
627
- 32
628
  ],
629
- "n_bytes": 3145728,
630
  "dtype": "torch.int16"
631
  }
632
  },
633
  "quant_format": "exl3",
634
- "bits_per_weight": 2,
635
  "mcg_multiplier": 3417055213
636
  },
637
  "layers.2.input_layernorm": {
@@ -843,14 +843,14 @@
843
  "shape": [
844
  128,
845
  384,
846
- 32
847
  ],
848
- "n_bytes": 3145728,
849
  "dtype": "torch.int16"
850
  }
851
  },
852
  "quant_format": "exl3",
853
- "bits_per_weight": 2,
854
  "mcg_multiplier": 3417055213
855
  },
856
  "layers.2.mlp.gate_proj": {
@@ -878,14 +878,14 @@
878
  "shape": [
879
  128,
880
  384,
881
- 32
882
  ],
883
- "n_bytes": 3145728,
884
  "dtype": "torch.int16"
885
  }
886
  },
887
  "quant_format": "exl3",
888
- "bits_per_weight": 2,
889
  "mcg_multiplier": 3417055213
890
  },
891
  "layers.2.mlp.down_proj": {
@@ -913,14 +913,14 @@
913
  "shape": [
914
  384,
915
  128,
916
- 32
917
  ],
918
- "n_bytes": 3145728,
919
  "dtype": "torch.int16"
920
  }
921
  },
922
  "quant_format": "exl3",
923
- "bits_per_weight": 2,
924
  "mcg_multiplier": 3417055213
925
  },
926
  "layers.3.input_layernorm": {
@@ -1132,14 +1132,14 @@
1132
  "shape": [
1133
  128,
1134
  384,
1135
- 32
1136
  ],
1137
- "n_bytes": 3145728,
1138
  "dtype": "torch.int16"
1139
  }
1140
  },
1141
  "quant_format": "exl3",
1142
- "bits_per_weight": 2,
1143
  "mcg_multiplier": 3417055213
1144
  },
1145
  "layers.3.mlp.gate_proj": {
@@ -1167,14 +1167,14 @@
1167
  "shape": [
1168
  128,
1169
  384,
1170
- 32
1171
  ],
1172
- "n_bytes": 3145728,
1173
  "dtype": "torch.int16"
1174
  }
1175
  },
1176
  "quant_format": "exl3",
1177
- "bits_per_weight": 2,
1178
  "mcg_multiplier": 3417055213
1179
  },
1180
  "layers.3.mlp.down_proj": {
@@ -1202,14 +1202,14 @@
1202
  "shape": [
1203
  384,
1204
  128,
1205
- 32
1206
  ],
1207
- "n_bytes": 3145728,
1208
  "dtype": "torch.int16"
1209
  }
1210
  },
1211
  "quant_format": "exl3",
1212
- "bits_per_weight": 2,
1213
  "mcg_multiplier": 3417055213
1214
  },
1215
  "layers.4.input_layernorm": {
@@ -1421,14 +1421,14 @@
1421
  "shape": [
1422
  128,
1423
  384,
1424
- 32
1425
  ],
1426
- "n_bytes": 3145728,
1427
  "dtype": "torch.int16"
1428
  }
1429
  },
1430
  "quant_format": "exl3",
1431
- "bits_per_weight": 2,
1432
  "mcg_multiplier": 3417055213
1433
  },
1434
  "layers.4.mlp.gate_proj": {
@@ -1456,14 +1456,14 @@
1456
  "shape": [
1457
  128,
1458
  384,
1459
- 32
1460
  ],
1461
- "n_bytes": 3145728,
1462
  "dtype": "torch.int16"
1463
  }
1464
  },
1465
  "quant_format": "exl3",
1466
- "bits_per_weight": 2,
1467
  "mcg_multiplier": 3417055213
1468
  },
1469
  "layers.4.mlp.down_proj": {
@@ -1491,14 +1491,14 @@
1491
  "shape": [
1492
  384,
1493
  128,
1494
- 32
1495
  ],
1496
- "n_bytes": 3145728,
1497
  "dtype": "torch.int16"
1498
  }
1499
  },
1500
  "quant_format": "exl3",
1501
- "bits_per_weight": 2,
1502
  "mcg_multiplier": 3417055213
1503
  },
1504
  "layers.5.input_layernorm": {
@@ -1710,14 +1710,14 @@
1710
  "shape": [
1711
  128,
1712
  384,
1713
- 32
1714
  ],
1715
- "n_bytes": 3145728,
1716
  "dtype": "torch.int16"
1717
  }
1718
  },
1719
  "quant_format": "exl3",
1720
- "bits_per_weight": 2,
1721
  "mcg_multiplier": 3417055213
1722
  },
1723
  "layers.5.mlp.gate_proj": {
@@ -1745,14 +1745,14 @@
1745
  "shape": [
1746
  128,
1747
  384,
1748
- 32
1749
  ],
1750
- "n_bytes": 3145728,
1751
  "dtype": "torch.int16"
1752
  }
1753
  },
1754
  "quant_format": "exl3",
1755
- "bits_per_weight": 2,
1756
  "mcg_multiplier": 3417055213
1757
  },
1758
  "layers.5.mlp.down_proj": {
@@ -1780,14 +1780,14 @@
1780
  "shape": [
1781
  384,
1782
  128,
1783
- 32
1784
  ],
1785
- "n_bytes": 3145728,
1786
  "dtype": "torch.int16"
1787
  }
1788
  },
1789
  "quant_format": "exl3",
1790
- "bits_per_weight": 2,
1791
  "mcg_multiplier": 3417055213
1792
  },
1793
  "layers.6.input_layernorm": {
@@ -1999,14 +1999,14 @@
1999
  "shape": [
2000
  128,
2001
  384,
2002
- 32
2003
  ],
2004
- "n_bytes": 3145728,
2005
  "dtype": "torch.int16"
2006
  }
2007
  },
2008
  "quant_format": "exl3",
2009
- "bits_per_weight": 2,
2010
  "mcg_multiplier": 3417055213
2011
  },
2012
  "layers.6.mlp.gate_proj": {
@@ -2034,14 +2034,14 @@
2034
  "shape": [
2035
  128,
2036
  384,
2037
- 32
2038
  ],
2039
- "n_bytes": 3145728,
2040
  "dtype": "torch.int16"
2041
  }
2042
  },
2043
  "quant_format": "exl3",
2044
- "bits_per_weight": 2,
2045
  "mcg_multiplier": 3417055213
2046
  },
2047
  "layers.6.mlp.down_proj": {
@@ -2069,14 +2069,14 @@
2069
  "shape": [
2070
  384,
2071
  128,
2072
- 32
2073
  ],
2074
- "n_bytes": 3145728,
2075
  "dtype": "torch.int16"
2076
  }
2077
  },
2078
  "quant_format": "exl3",
2079
- "bits_per_weight": 2,
2080
  "mcg_multiplier": 3417055213
2081
  },
2082
  "layers.7.input_layernorm": {
@@ -2288,14 +2288,14 @@
2288
  "shape": [
2289
  128,
2290
  384,
2291
- 32
2292
  ],
2293
- "n_bytes": 3145728,
2294
  "dtype": "torch.int16"
2295
  }
2296
  },
2297
  "quant_format": "exl3",
2298
- "bits_per_weight": 2,
2299
  "mcg_multiplier": 3417055213
2300
  },
2301
  "layers.7.mlp.gate_proj": {
@@ -2323,14 +2323,14 @@
2323
  "shape": [
2324
  128,
2325
  384,
2326
- 32
2327
  ],
2328
- "n_bytes": 3145728,
2329
  "dtype": "torch.int16"
2330
  }
2331
  },
2332
  "quant_format": "exl3",
2333
- "bits_per_weight": 2,
2334
  "mcg_multiplier": 3417055213
2335
  },
2336
  "layers.7.mlp.down_proj": {
@@ -2358,14 +2358,14 @@
2358
  "shape": [
2359
  384,
2360
  128,
2361
- 32
2362
  ],
2363
- "n_bytes": 3145728,
2364
  "dtype": "torch.int16"
2365
  }
2366
  },
2367
  "quant_format": "exl3",
2368
- "bits_per_weight": 2,
2369
  "mcg_multiplier": 3417055213
2370
  },
2371
  "norm": {
 
1
  {
2
  "quant_method": "exl3",
3
  "version": "0.0.32",
4
+ "bits": 3.0,
5
  "head_bits": 6,
6
  "calibration": {
7
  "rows": 250,
 
624
  "shape": [
625
  384,
626
  128,
627
+ 48
628
  ],
629
+ "n_bytes": 4718592,
630
  "dtype": "torch.int16"
631
  }
632
  },
633
  "quant_format": "exl3",
634
+ "bits_per_weight": 3,
635
  "mcg_multiplier": 3417055213
636
  },
637
  "layers.2.input_layernorm": {
 
843
  "shape": [
844
  128,
845
  384,
846
+ 48
847
  ],
848
+ "n_bytes": 4718592,
849
  "dtype": "torch.int16"
850
  }
851
  },
852
  "quant_format": "exl3",
853
+ "bits_per_weight": 3,
854
  "mcg_multiplier": 3417055213
855
  },
856
  "layers.2.mlp.gate_proj": {
 
878
  "shape": [
879
  128,
880
  384,
881
+ 48
882
  ],
883
+ "n_bytes": 4718592,
884
  "dtype": "torch.int16"
885
  }
886
  },
887
  "quant_format": "exl3",
888
+ "bits_per_weight": 3,
889
  "mcg_multiplier": 3417055213
890
  },
891
  "layers.2.mlp.down_proj": {
 
913
  "shape": [
914
  384,
915
  128,
916
+ 48
917
  ],
918
+ "n_bytes": 4718592,
919
  "dtype": "torch.int16"
920
  }
921
  },
922
  "quant_format": "exl3",
923
+ "bits_per_weight": 3,
924
  "mcg_multiplier": 3417055213
925
  },
926
  "layers.3.input_layernorm": {
 
1132
  "shape": [
1133
  128,
1134
  384,
1135
+ 48
1136
  ],
1137
+ "n_bytes": 4718592,
1138
  "dtype": "torch.int16"
1139
  }
1140
  },
1141
  "quant_format": "exl3",
1142
+ "bits_per_weight": 3,
1143
  "mcg_multiplier": 3417055213
1144
  },
1145
  "layers.3.mlp.gate_proj": {
 
1167
  "shape": [
1168
  128,
1169
  384,
1170
+ 48
1171
  ],
1172
+ "n_bytes": 4718592,
1173
  "dtype": "torch.int16"
1174
  }
1175
  },
1176
  "quant_format": "exl3",
1177
+ "bits_per_weight": 3,
1178
  "mcg_multiplier": 3417055213
1179
  },
1180
  "layers.3.mlp.down_proj": {
 
1202
  "shape": [
1203
  384,
1204
  128,
1205
+ 48
1206
  ],
1207
+ "n_bytes": 4718592,
1208
  "dtype": "torch.int16"
1209
  }
1210
  },
1211
  "quant_format": "exl3",
1212
+ "bits_per_weight": 3,
1213
  "mcg_multiplier": 3417055213
1214
  },
1215
  "layers.4.input_layernorm": {
 
1421
  "shape": [
1422
  128,
1423
  384,
1424
+ 48
1425
  ],
1426
+ "n_bytes": 4718592,
1427
  "dtype": "torch.int16"
1428
  }
1429
  },
1430
  "quant_format": "exl3",
1431
+ "bits_per_weight": 3,
1432
  "mcg_multiplier": 3417055213
1433
  },
1434
  "layers.4.mlp.gate_proj": {
 
1456
  "shape": [
1457
  128,
1458
  384,
1459
+ 48
1460
  ],
1461
+ "n_bytes": 4718592,
1462
  "dtype": "torch.int16"
1463
  }
1464
  },
1465
  "quant_format": "exl3",
1466
+ "bits_per_weight": 3,
1467
  "mcg_multiplier": 3417055213
1468
  },
1469
  "layers.4.mlp.down_proj": {
 
1491
  "shape": [
1492
  384,
1493
  128,
1494
+ 48
1495
  ],
1496
+ "n_bytes": 4718592,
1497
  "dtype": "torch.int16"
1498
  }
1499
  },
1500
  "quant_format": "exl3",
1501
+ "bits_per_weight": 3,
1502
  "mcg_multiplier": 3417055213
1503
  },
1504
  "layers.5.input_layernorm": {
 
1710
  "shape": [
1711
  128,
1712
  384,
1713
+ 48
1714
  ],
1715
+ "n_bytes": 4718592,
1716
  "dtype": "torch.int16"
1717
  }
1718
  },
1719
  "quant_format": "exl3",
1720
+ "bits_per_weight": 3,
1721
  "mcg_multiplier": 3417055213
1722
  },
1723
  "layers.5.mlp.gate_proj": {
 
1745
  "shape": [
1746
  128,
1747
  384,
1748
+ 48
1749
  ],
1750
+ "n_bytes": 4718592,
1751
  "dtype": "torch.int16"
1752
  }
1753
  },
1754
  "quant_format": "exl3",
1755
+ "bits_per_weight": 3,
1756
  "mcg_multiplier": 3417055213
1757
  },
1758
  "layers.5.mlp.down_proj": {
 
1780
  "shape": [
1781
  384,
1782
  128,
1783
+ 48
1784
  ],
1785
+ "n_bytes": 4718592,
1786
  "dtype": "torch.int16"
1787
  }
1788
  },
1789
  "quant_format": "exl3",
1790
+ "bits_per_weight": 3,
1791
  "mcg_multiplier": 3417055213
1792
  },
1793
  "layers.6.input_layernorm": {
 
1999
  "shape": [
2000
  128,
2001
  384,
2002
+ 48
2003
  ],
2004
+ "n_bytes": 4718592,
2005
  "dtype": "torch.int16"
2006
  }
2007
  },
2008
  "quant_format": "exl3",
2009
+ "bits_per_weight": 3,
2010
  "mcg_multiplier": 3417055213
2011
  },
2012
  "layers.6.mlp.gate_proj": {
 
2034
  "shape": [
2035
  128,
2036
  384,
2037
+ 48
2038
  ],
2039
+ "n_bytes": 4718592,
2040
  "dtype": "torch.int16"
2041
  }
2042
  },
2043
  "quant_format": "exl3",
2044
+ "bits_per_weight": 3,
2045
  "mcg_multiplier": 3417055213
2046
  },
2047
  "layers.6.mlp.down_proj": {
 
2069
  "shape": [
2070
  384,
2071
  128,
2072
+ 48
2073
  ],
2074
+ "n_bytes": 4718592,
2075
  "dtype": "torch.int16"
2076
  }
2077
  },
2078
  "quant_format": "exl3",
2079
+ "bits_per_weight": 3,
2080
  "mcg_multiplier": 3417055213
2081
  },
2082
  "layers.7.input_layernorm": {
 
2288
  "shape": [
2289
  128,
2290
  384,
2291
+ 48
2292
  ],
2293
+ "n_bytes": 4718592,
2294
  "dtype": "torch.int16"
2295
  }
2296
  },
2297
  "quant_format": "exl3",
2298
+ "bits_per_weight": 3,
2299
  "mcg_multiplier": 3417055213
2300
  },
2301
  "layers.7.mlp.gate_proj": {
 
2323
  "shape": [
2324
  128,
2325
  384,
2326
+ 48
2327
  ],
2328
+ "n_bytes": 4718592,
2329
  "dtype": "torch.int16"
2330
  }
2331
  },
2332
  "quant_format": "exl3",
2333
+ "bits_per_weight": 3,
2334
  "mcg_multiplier": 3417055213
2335
  },
2336
  "layers.7.mlp.down_proj": {
 
2358
  "shape": [
2359
  384,
2360
  128,
2361
+ 48
2362
  ],
2363
+ "n_bytes": 4718592,
2364
  "dtype": "torch.int16"
2365
  }
2366
  },
2367
  "quant_format": "exl3",
2368
+ "bits_per_weight": 3,
2369
  "mcg_multiplier": 3417055213
2370
  },
2371
  "norm": {