Upload 5 files
Browse files- config.json +1 -1
- model.safetensors +2 -2
- quantization_config.json +58 -58
config.json
CHANGED
|
@@ -62,7 +62,7 @@
|
|
| 62 |
"quantization_config": {
|
| 63 |
"quant_method": "exl3",
|
| 64 |
"version": "0.0.32",
|
| 65 |
-
"bits":
|
| 66 |
"head_bits": 6,
|
| 67 |
"calibration": {
|
| 68 |
"rows": 250,
|
|
|
|
| 62 |
"quantization_config": {
|
| 63 |
"quant_method": "exl3",
|
| 64 |
"version": "0.0.32",
|
| 65 |
+
"bits": 3.0,
|
| 66 |
"head_bits": 6,
|
| 67 |
"calibration": {
|
| 68 |
"rows": 250,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:56002139c64c9ee73aa02b0961ab838efbfa923c593cf7c7761b4eb40352f55a
|
| 3 |
+
size 178533879
|
quantization_config.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"quant_method": "exl3",
|
| 3 |
"version": "0.0.32",
|
| 4 |
-
"bits":
|
| 5 |
"head_bits": 6,
|
| 6 |
"calibration": {
|
| 7 |
"rows": 250,
|
|
@@ -624,14 +624,14 @@
|
|
| 624 |
"shape": [
|
| 625 |
384,
|
| 626 |
128,
|
| 627 |
-
|
| 628 |
],
|
| 629 |
-
"n_bytes":
|
| 630 |
"dtype": "torch.int16"
|
| 631 |
}
|
| 632 |
},
|
| 633 |
"quant_format": "exl3",
|
| 634 |
-
"bits_per_weight":
|
| 635 |
"mcg_multiplier": 3417055213
|
| 636 |
},
|
| 637 |
"layers.2.input_layernorm": {
|
|
@@ -843,14 +843,14 @@
|
|
| 843 |
"shape": [
|
| 844 |
128,
|
| 845 |
384,
|
| 846 |
-
|
| 847 |
],
|
| 848 |
-
"n_bytes":
|
| 849 |
"dtype": "torch.int16"
|
| 850 |
}
|
| 851 |
},
|
| 852 |
"quant_format": "exl3",
|
| 853 |
-
"bits_per_weight":
|
| 854 |
"mcg_multiplier": 3417055213
|
| 855 |
},
|
| 856 |
"layers.2.mlp.gate_proj": {
|
|
@@ -878,14 +878,14 @@
|
|
| 878 |
"shape": [
|
| 879 |
128,
|
| 880 |
384,
|
| 881 |
-
|
| 882 |
],
|
| 883 |
-
"n_bytes":
|
| 884 |
"dtype": "torch.int16"
|
| 885 |
}
|
| 886 |
},
|
| 887 |
"quant_format": "exl3",
|
| 888 |
-
"bits_per_weight":
|
| 889 |
"mcg_multiplier": 3417055213
|
| 890 |
},
|
| 891 |
"layers.2.mlp.down_proj": {
|
|
@@ -913,14 +913,14 @@
|
|
| 913 |
"shape": [
|
| 914 |
384,
|
| 915 |
128,
|
| 916 |
-
|
| 917 |
],
|
| 918 |
-
"n_bytes":
|
| 919 |
"dtype": "torch.int16"
|
| 920 |
}
|
| 921 |
},
|
| 922 |
"quant_format": "exl3",
|
| 923 |
-
"bits_per_weight":
|
| 924 |
"mcg_multiplier": 3417055213
|
| 925 |
},
|
| 926 |
"layers.3.input_layernorm": {
|
|
@@ -1132,14 +1132,14 @@
|
|
| 1132 |
"shape": [
|
| 1133 |
128,
|
| 1134 |
384,
|
| 1135 |
-
|
| 1136 |
],
|
| 1137 |
-
"n_bytes":
|
| 1138 |
"dtype": "torch.int16"
|
| 1139 |
}
|
| 1140 |
},
|
| 1141 |
"quant_format": "exl3",
|
| 1142 |
-
"bits_per_weight":
|
| 1143 |
"mcg_multiplier": 3417055213
|
| 1144 |
},
|
| 1145 |
"layers.3.mlp.gate_proj": {
|
|
@@ -1167,14 +1167,14 @@
|
|
| 1167 |
"shape": [
|
| 1168 |
128,
|
| 1169 |
384,
|
| 1170 |
-
|
| 1171 |
],
|
| 1172 |
-
"n_bytes":
|
| 1173 |
"dtype": "torch.int16"
|
| 1174 |
}
|
| 1175 |
},
|
| 1176 |
"quant_format": "exl3",
|
| 1177 |
-
"bits_per_weight":
|
| 1178 |
"mcg_multiplier": 3417055213
|
| 1179 |
},
|
| 1180 |
"layers.3.mlp.down_proj": {
|
|
@@ -1202,14 +1202,14 @@
|
|
| 1202 |
"shape": [
|
| 1203 |
384,
|
| 1204 |
128,
|
| 1205 |
-
|
| 1206 |
],
|
| 1207 |
-
"n_bytes":
|
| 1208 |
"dtype": "torch.int16"
|
| 1209 |
}
|
| 1210 |
},
|
| 1211 |
"quant_format": "exl3",
|
| 1212 |
-
"bits_per_weight":
|
| 1213 |
"mcg_multiplier": 3417055213
|
| 1214 |
},
|
| 1215 |
"layers.4.input_layernorm": {
|
|
@@ -1421,14 +1421,14 @@
|
|
| 1421 |
"shape": [
|
| 1422 |
128,
|
| 1423 |
384,
|
| 1424 |
-
|
| 1425 |
],
|
| 1426 |
-
"n_bytes":
|
| 1427 |
"dtype": "torch.int16"
|
| 1428 |
}
|
| 1429 |
},
|
| 1430 |
"quant_format": "exl3",
|
| 1431 |
-
"bits_per_weight":
|
| 1432 |
"mcg_multiplier": 3417055213
|
| 1433 |
},
|
| 1434 |
"layers.4.mlp.gate_proj": {
|
|
@@ -1456,14 +1456,14 @@
|
|
| 1456 |
"shape": [
|
| 1457 |
128,
|
| 1458 |
384,
|
| 1459 |
-
|
| 1460 |
],
|
| 1461 |
-
"n_bytes":
|
| 1462 |
"dtype": "torch.int16"
|
| 1463 |
}
|
| 1464 |
},
|
| 1465 |
"quant_format": "exl3",
|
| 1466 |
-
"bits_per_weight":
|
| 1467 |
"mcg_multiplier": 3417055213
|
| 1468 |
},
|
| 1469 |
"layers.4.mlp.down_proj": {
|
|
@@ -1491,14 +1491,14 @@
|
|
| 1491 |
"shape": [
|
| 1492 |
384,
|
| 1493 |
128,
|
| 1494 |
-
|
| 1495 |
],
|
| 1496 |
-
"n_bytes":
|
| 1497 |
"dtype": "torch.int16"
|
| 1498 |
}
|
| 1499 |
},
|
| 1500 |
"quant_format": "exl3",
|
| 1501 |
-
"bits_per_weight":
|
| 1502 |
"mcg_multiplier": 3417055213
|
| 1503 |
},
|
| 1504 |
"layers.5.input_layernorm": {
|
|
@@ -1710,14 +1710,14 @@
|
|
| 1710 |
"shape": [
|
| 1711 |
128,
|
| 1712 |
384,
|
| 1713 |
-
|
| 1714 |
],
|
| 1715 |
-
"n_bytes":
|
| 1716 |
"dtype": "torch.int16"
|
| 1717 |
}
|
| 1718 |
},
|
| 1719 |
"quant_format": "exl3",
|
| 1720 |
-
"bits_per_weight":
|
| 1721 |
"mcg_multiplier": 3417055213
|
| 1722 |
},
|
| 1723 |
"layers.5.mlp.gate_proj": {
|
|
@@ -1745,14 +1745,14 @@
|
|
| 1745 |
"shape": [
|
| 1746 |
128,
|
| 1747 |
384,
|
| 1748 |
-
|
| 1749 |
],
|
| 1750 |
-
"n_bytes":
|
| 1751 |
"dtype": "torch.int16"
|
| 1752 |
}
|
| 1753 |
},
|
| 1754 |
"quant_format": "exl3",
|
| 1755 |
-
"bits_per_weight":
|
| 1756 |
"mcg_multiplier": 3417055213
|
| 1757 |
},
|
| 1758 |
"layers.5.mlp.down_proj": {
|
|
@@ -1780,14 +1780,14 @@
|
|
| 1780 |
"shape": [
|
| 1781 |
384,
|
| 1782 |
128,
|
| 1783 |
-
|
| 1784 |
],
|
| 1785 |
-
"n_bytes":
|
| 1786 |
"dtype": "torch.int16"
|
| 1787 |
}
|
| 1788 |
},
|
| 1789 |
"quant_format": "exl3",
|
| 1790 |
-
"bits_per_weight":
|
| 1791 |
"mcg_multiplier": 3417055213
|
| 1792 |
},
|
| 1793 |
"layers.6.input_layernorm": {
|
|
@@ -1999,14 +1999,14 @@
|
|
| 1999 |
"shape": [
|
| 2000 |
128,
|
| 2001 |
384,
|
| 2002 |
-
|
| 2003 |
],
|
| 2004 |
-
"n_bytes":
|
| 2005 |
"dtype": "torch.int16"
|
| 2006 |
}
|
| 2007 |
},
|
| 2008 |
"quant_format": "exl3",
|
| 2009 |
-
"bits_per_weight":
|
| 2010 |
"mcg_multiplier": 3417055213
|
| 2011 |
},
|
| 2012 |
"layers.6.mlp.gate_proj": {
|
|
@@ -2034,14 +2034,14 @@
|
|
| 2034 |
"shape": [
|
| 2035 |
128,
|
| 2036 |
384,
|
| 2037 |
-
|
| 2038 |
],
|
| 2039 |
-
"n_bytes":
|
| 2040 |
"dtype": "torch.int16"
|
| 2041 |
}
|
| 2042 |
},
|
| 2043 |
"quant_format": "exl3",
|
| 2044 |
-
"bits_per_weight":
|
| 2045 |
"mcg_multiplier": 3417055213
|
| 2046 |
},
|
| 2047 |
"layers.6.mlp.down_proj": {
|
|
@@ -2069,14 +2069,14 @@
|
|
| 2069 |
"shape": [
|
| 2070 |
384,
|
| 2071 |
128,
|
| 2072 |
-
|
| 2073 |
],
|
| 2074 |
-
"n_bytes":
|
| 2075 |
"dtype": "torch.int16"
|
| 2076 |
}
|
| 2077 |
},
|
| 2078 |
"quant_format": "exl3",
|
| 2079 |
-
"bits_per_weight":
|
| 2080 |
"mcg_multiplier": 3417055213
|
| 2081 |
},
|
| 2082 |
"layers.7.input_layernorm": {
|
|
@@ -2288,14 +2288,14 @@
|
|
| 2288 |
"shape": [
|
| 2289 |
128,
|
| 2290 |
384,
|
| 2291 |
-
|
| 2292 |
],
|
| 2293 |
-
"n_bytes":
|
| 2294 |
"dtype": "torch.int16"
|
| 2295 |
}
|
| 2296 |
},
|
| 2297 |
"quant_format": "exl3",
|
| 2298 |
-
"bits_per_weight":
|
| 2299 |
"mcg_multiplier": 3417055213
|
| 2300 |
},
|
| 2301 |
"layers.7.mlp.gate_proj": {
|
|
@@ -2323,14 +2323,14 @@
|
|
| 2323 |
"shape": [
|
| 2324 |
128,
|
| 2325 |
384,
|
| 2326 |
-
|
| 2327 |
],
|
| 2328 |
-
"n_bytes":
|
| 2329 |
"dtype": "torch.int16"
|
| 2330 |
}
|
| 2331 |
},
|
| 2332 |
"quant_format": "exl3",
|
| 2333 |
-
"bits_per_weight":
|
| 2334 |
"mcg_multiplier": 3417055213
|
| 2335 |
},
|
| 2336 |
"layers.7.mlp.down_proj": {
|
|
@@ -2358,14 +2358,14 @@
|
|
| 2358 |
"shape": [
|
| 2359 |
384,
|
| 2360 |
128,
|
| 2361 |
-
|
| 2362 |
],
|
| 2363 |
-
"n_bytes":
|
| 2364 |
"dtype": "torch.int16"
|
| 2365 |
}
|
| 2366 |
},
|
| 2367 |
"quant_format": "exl3",
|
| 2368 |
-
"bits_per_weight":
|
| 2369 |
"mcg_multiplier": 3417055213
|
| 2370 |
},
|
| 2371 |
"norm": {
|
|
|
|
| 1 |
{
|
| 2 |
"quant_method": "exl3",
|
| 3 |
"version": "0.0.32",
|
| 4 |
+
"bits": 3.0,
|
| 5 |
"head_bits": 6,
|
| 6 |
"calibration": {
|
| 7 |
"rows": 250,
|
|
|
|
| 624 |
"shape": [
|
| 625 |
384,
|
| 626 |
128,
|
| 627 |
+
48
|
| 628 |
],
|
| 629 |
+
"n_bytes": 4718592,
|
| 630 |
"dtype": "torch.int16"
|
| 631 |
}
|
| 632 |
},
|
| 633 |
"quant_format": "exl3",
|
| 634 |
+
"bits_per_weight": 3,
|
| 635 |
"mcg_multiplier": 3417055213
|
| 636 |
},
|
| 637 |
"layers.2.input_layernorm": {
|
|
|
|
| 843 |
"shape": [
|
| 844 |
128,
|
| 845 |
384,
|
| 846 |
+
48
|
| 847 |
],
|
| 848 |
+
"n_bytes": 4718592,
|
| 849 |
"dtype": "torch.int16"
|
| 850 |
}
|
| 851 |
},
|
| 852 |
"quant_format": "exl3",
|
| 853 |
+
"bits_per_weight": 3,
|
| 854 |
"mcg_multiplier": 3417055213
|
| 855 |
},
|
| 856 |
"layers.2.mlp.gate_proj": {
|
|
|
|
| 878 |
"shape": [
|
| 879 |
128,
|
| 880 |
384,
|
| 881 |
+
48
|
| 882 |
],
|
| 883 |
+
"n_bytes": 4718592,
|
| 884 |
"dtype": "torch.int16"
|
| 885 |
}
|
| 886 |
},
|
| 887 |
"quant_format": "exl3",
|
| 888 |
+
"bits_per_weight": 3,
|
| 889 |
"mcg_multiplier": 3417055213
|
| 890 |
},
|
| 891 |
"layers.2.mlp.down_proj": {
|
|
|
|
| 913 |
"shape": [
|
| 914 |
384,
|
| 915 |
128,
|
| 916 |
+
48
|
| 917 |
],
|
| 918 |
+
"n_bytes": 4718592,
|
| 919 |
"dtype": "torch.int16"
|
| 920 |
}
|
| 921 |
},
|
| 922 |
"quant_format": "exl3",
|
| 923 |
+
"bits_per_weight": 3,
|
| 924 |
"mcg_multiplier": 3417055213
|
| 925 |
},
|
| 926 |
"layers.3.input_layernorm": {
|
|
|
|
| 1132 |
"shape": [
|
| 1133 |
128,
|
| 1134 |
384,
|
| 1135 |
+
48
|
| 1136 |
],
|
| 1137 |
+
"n_bytes": 4718592,
|
| 1138 |
"dtype": "torch.int16"
|
| 1139 |
}
|
| 1140 |
},
|
| 1141 |
"quant_format": "exl3",
|
| 1142 |
+
"bits_per_weight": 3,
|
| 1143 |
"mcg_multiplier": 3417055213
|
| 1144 |
},
|
| 1145 |
"layers.3.mlp.gate_proj": {
|
|
|
|
| 1167 |
"shape": [
|
| 1168 |
128,
|
| 1169 |
384,
|
| 1170 |
+
48
|
| 1171 |
],
|
| 1172 |
+
"n_bytes": 4718592,
|
| 1173 |
"dtype": "torch.int16"
|
| 1174 |
}
|
| 1175 |
},
|
| 1176 |
"quant_format": "exl3",
|
| 1177 |
+
"bits_per_weight": 3,
|
| 1178 |
"mcg_multiplier": 3417055213
|
| 1179 |
},
|
| 1180 |
"layers.3.mlp.down_proj": {
|
|
|
|
| 1202 |
"shape": [
|
| 1203 |
384,
|
| 1204 |
128,
|
| 1205 |
+
48
|
| 1206 |
],
|
| 1207 |
+
"n_bytes": 4718592,
|
| 1208 |
"dtype": "torch.int16"
|
| 1209 |
}
|
| 1210 |
},
|
| 1211 |
"quant_format": "exl3",
|
| 1212 |
+
"bits_per_weight": 3,
|
| 1213 |
"mcg_multiplier": 3417055213
|
| 1214 |
},
|
| 1215 |
"layers.4.input_layernorm": {
|
|
|
|
| 1421 |
"shape": [
|
| 1422 |
128,
|
| 1423 |
384,
|
| 1424 |
+
48
|
| 1425 |
],
|
| 1426 |
+
"n_bytes": 4718592,
|
| 1427 |
"dtype": "torch.int16"
|
| 1428 |
}
|
| 1429 |
},
|
| 1430 |
"quant_format": "exl3",
|
| 1431 |
+
"bits_per_weight": 3,
|
| 1432 |
"mcg_multiplier": 3417055213
|
| 1433 |
},
|
| 1434 |
"layers.4.mlp.gate_proj": {
|
|
|
|
| 1456 |
"shape": [
|
| 1457 |
128,
|
| 1458 |
384,
|
| 1459 |
+
48
|
| 1460 |
],
|
| 1461 |
+
"n_bytes": 4718592,
|
| 1462 |
"dtype": "torch.int16"
|
| 1463 |
}
|
| 1464 |
},
|
| 1465 |
"quant_format": "exl3",
|
| 1466 |
+
"bits_per_weight": 3,
|
| 1467 |
"mcg_multiplier": 3417055213
|
| 1468 |
},
|
| 1469 |
"layers.4.mlp.down_proj": {
|
|
|
|
| 1491 |
"shape": [
|
| 1492 |
384,
|
| 1493 |
128,
|
| 1494 |
+
48
|
| 1495 |
],
|
| 1496 |
+
"n_bytes": 4718592,
|
| 1497 |
"dtype": "torch.int16"
|
| 1498 |
}
|
| 1499 |
},
|
| 1500 |
"quant_format": "exl3",
|
| 1501 |
+
"bits_per_weight": 3,
|
| 1502 |
"mcg_multiplier": 3417055213
|
| 1503 |
},
|
| 1504 |
"layers.5.input_layernorm": {
|
|
|
|
| 1710 |
"shape": [
|
| 1711 |
128,
|
| 1712 |
384,
|
| 1713 |
+
48
|
| 1714 |
],
|
| 1715 |
+
"n_bytes": 4718592,
|
| 1716 |
"dtype": "torch.int16"
|
| 1717 |
}
|
| 1718 |
},
|
| 1719 |
"quant_format": "exl3",
|
| 1720 |
+
"bits_per_weight": 3,
|
| 1721 |
"mcg_multiplier": 3417055213
|
| 1722 |
},
|
| 1723 |
"layers.5.mlp.gate_proj": {
|
|
|
|
| 1745 |
"shape": [
|
| 1746 |
128,
|
| 1747 |
384,
|
| 1748 |
+
48
|
| 1749 |
],
|
| 1750 |
+
"n_bytes": 4718592,
|
| 1751 |
"dtype": "torch.int16"
|
| 1752 |
}
|
| 1753 |
},
|
| 1754 |
"quant_format": "exl3",
|
| 1755 |
+
"bits_per_weight": 3,
|
| 1756 |
"mcg_multiplier": 3417055213
|
| 1757 |
},
|
| 1758 |
"layers.5.mlp.down_proj": {
|
|
|
|
| 1780 |
"shape": [
|
| 1781 |
384,
|
| 1782 |
128,
|
| 1783 |
+
48
|
| 1784 |
],
|
| 1785 |
+
"n_bytes": 4718592,
|
| 1786 |
"dtype": "torch.int16"
|
| 1787 |
}
|
| 1788 |
},
|
| 1789 |
"quant_format": "exl3",
|
| 1790 |
+
"bits_per_weight": 3,
|
| 1791 |
"mcg_multiplier": 3417055213
|
| 1792 |
},
|
| 1793 |
"layers.6.input_layernorm": {
|
|
|
|
| 1999 |
"shape": [
|
| 2000 |
128,
|
| 2001 |
384,
|
| 2002 |
+
48
|
| 2003 |
],
|
| 2004 |
+
"n_bytes": 4718592,
|
| 2005 |
"dtype": "torch.int16"
|
| 2006 |
}
|
| 2007 |
},
|
| 2008 |
"quant_format": "exl3",
|
| 2009 |
+
"bits_per_weight": 3,
|
| 2010 |
"mcg_multiplier": 3417055213
|
| 2011 |
},
|
| 2012 |
"layers.6.mlp.gate_proj": {
|
|
|
|
| 2034 |
"shape": [
|
| 2035 |
128,
|
| 2036 |
384,
|
| 2037 |
+
48
|
| 2038 |
],
|
| 2039 |
+
"n_bytes": 4718592,
|
| 2040 |
"dtype": "torch.int16"
|
| 2041 |
}
|
| 2042 |
},
|
| 2043 |
"quant_format": "exl3",
|
| 2044 |
+
"bits_per_weight": 3,
|
| 2045 |
"mcg_multiplier": 3417055213
|
| 2046 |
},
|
| 2047 |
"layers.6.mlp.down_proj": {
|
|
|
|
| 2069 |
"shape": [
|
| 2070 |
384,
|
| 2071 |
128,
|
| 2072 |
+
48
|
| 2073 |
],
|
| 2074 |
+
"n_bytes": 4718592,
|
| 2075 |
"dtype": "torch.int16"
|
| 2076 |
}
|
| 2077 |
},
|
| 2078 |
"quant_format": "exl3",
|
| 2079 |
+
"bits_per_weight": 3,
|
| 2080 |
"mcg_multiplier": 3417055213
|
| 2081 |
},
|
| 2082 |
"layers.7.input_layernorm": {
|
|
|
|
| 2288 |
"shape": [
|
| 2289 |
128,
|
| 2290 |
384,
|
| 2291 |
+
48
|
| 2292 |
],
|
| 2293 |
+
"n_bytes": 4718592,
|
| 2294 |
"dtype": "torch.int16"
|
| 2295 |
}
|
| 2296 |
},
|
| 2297 |
"quant_format": "exl3",
|
| 2298 |
+
"bits_per_weight": 3,
|
| 2299 |
"mcg_multiplier": 3417055213
|
| 2300 |
},
|
| 2301 |
"layers.7.mlp.gate_proj": {
|
|
|
|
| 2323 |
"shape": [
|
| 2324 |
128,
|
| 2325 |
384,
|
| 2326 |
+
48
|
| 2327 |
],
|
| 2328 |
+
"n_bytes": 4718592,
|
| 2329 |
"dtype": "torch.int16"
|
| 2330 |
}
|
| 2331 |
},
|
| 2332 |
"quant_format": "exl3",
|
| 2333 |
+
"bits_per_weight": 3,
|
| 2334 |
"mcg_multiplier": 3417055213
|
| 2335 |
},
|
| 2336 |
"layers.7.mlp.down_proj": {
|
|
|
|
| 2358 |
"shape": [
|
| 2359 |
384,
|
| 2360 |
128,
|
| 2361 |
+
48
|
| 2362 |
],
|
| 2363 |
+
"n_bytes": 4718592,
|
| 2364 |
"dtype": "torch.int16"
|
| 2365 |
}
|
| 2366 |
},
|
| 2367 |
"quant_format": "exl3",
|
| 2368 |
+
"bits_per_weight": 3,
|
| 2369 |
"mcg_multiplier": 3417055213
|
| 2370 |
},
|
| 2371 |
"norm": {
|