| { |
| "metadata": { |
| "ParamSize": 648, |
| "ParamBytes": 8902356992.0, |
| "BitsPerParam": 4.5019815935059295 |
| }, |
| "records": [ |
| { |
| "dataPath": "params_shard_0.bin", |
| "format": "raw-shard", |
| "nbytes": 150994944, |
| "records": [ |
| { |
| "name": "lm_head.q_weight", |
| "shape": [ |
| 49152, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 150994944, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "babcefb413e4f053531159532b1603c5" |
| }, |
| { |
| "dataPath": "params_shard_1.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.38.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "5ae40837c56fc53a1e6867cc93ee441f" |
| }, |
| { |
| "dataPath": "params_shard_2.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.38.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2397855968fbee540027008677ddf020" |
| }, |
| { |
| "dataPath": "params_shard_3.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.38.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dea2f1f44cd50fc82df9cbeb7ca29dd1" |
| }, |
| { |
| "dataPath": "params_shard_4.bin", |
| "format": "raw-shard", |
| "nbytes": 30769152, |
| "records": [ |
| { |
| "name": "lm_head.q_scale", |
| "shape": [ |
| 49152, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.38.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 18874368 |
| }, |
| { |
| "name": "transformer.h.38.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.38.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21245952 |
| }, |
| { |
| "name": "transformer.h.38.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21258240 |
| }, |
| { |
| "name": "transformer.h.38.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 21270528 |
| }, |
| { |
| "name": "transformer.h.38.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 21319680 |
| }, |
| { |
| "name": "transformer.h.38.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 30756864 |
| } |
| ], |
| "md5sum": "bc501ee66f72f3f28eea2a0dda3e9872" |
| }, |
| { |
| "dataPath": "params_shard_5.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.39.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0886e966a07a80c3e1a4170221d333ab" |
| }, |
| { |
| "dataPath": "params_shard_6.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.38.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.39.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.39.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.39.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.39.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "e49fa3e84a78e3a2e53b084165a1c521" |
| }, |
| { |
| "dataPath": "params_shard_7.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.39.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0c8ee868d1e77ca9833a15d68538d538" |
| }, |
| { |
| "dataPath": "params_shard_8.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.39.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "562d7e70fd8ebe1e00887ffe19e1972c" |
| }, |
| { |
| "dataPath": "params_shard_9.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.0.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b3a6a1ec978b9ed8f6262c9a12da107a" |
| }, |
| { |
| "dataPath": "params_shard_10.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.0.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "20ffc69b60fb065cac8601db108435f4" |
| }, |
| { |
| "dataPath": "params_shard_11.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.0.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a8ff096b5c3ea5e18273979807e9e68e" |
| }, |
| { |
| "dataPath": "params_shard_12.bin", |
| "format": "raw-shard", |
| "nbytes": 26309120, |
| "records": [ |
| { |
| "name": "transformer.h.39.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.39.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.39.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.39.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.39.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.39.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.39.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.39.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.39.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.ln_f.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.ln_f.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21356544 |
| }, |
| { |
| "name": "transformer.h.0.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21368832 |
| }, |
| { |
| "name": "transformer.h.0.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21381632 |
| }, |
| { |
| "name": "transformer.h.0.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23839232 |
| }, |
| { |
| "name": "transformer.h.0.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23851520 |
| }, |
| { |
| "name": "transformer.h.0.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.0.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.0.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26235392 |
| }, |
| { |
| "name": "transformer.h.0.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26247680 |
| }, |
| { |
| "name": "transformer.h.0.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26259968 |
| } |
| ], |
| "md5sum": "856d05c256b46f8c834824faeb3ff628" |
| }, |
| { |
| "dataPath": "params_shard_13.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.0.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a6bbaca2578b4639dbd2d54510beddbf" |
| }, |
| { |
| "dataPath": "params_shard_14.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.1.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "80cf30393e14efab29306e340d0ce595" |
| }, |
| { |
| "dataPath": "params_shard_15.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.1.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cffaac7091ff103b4fa07c4d4561caa3" |
| }, |
| { |
| "dataPath": "params_shard_16.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.1.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3d845f06b6924fb5da9d9e39a7da3894" |
| }, |
| { |
| "dataPath": "params_shard_17.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.1.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0752298d33b77e2a5abfff59e0002246" |
| }, |
| { |
| "dataPath": "params_shard_18.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.0.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.0.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.0.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.1.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.1.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.1.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.1.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.1.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.1.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.1.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.1.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.1.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.1.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.1.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "00388c94dedf014b0205e210d3d24a82" |
| }, |
| { |
| "dataPath": "params_shard_19.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.2.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7563282d813947f7c4011a0e6765ed46" |
| }, |
| { |
| "dataPath": "params_shard_20.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.1.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.2.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.2.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.2.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.2.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "383a9bfbf85a08c9822f4479134e08c8" |
| }, |
| { |
| "dataPath": "params_shard_21.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.2.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a6093d6d592c71b32c331d92015de46a" |
| }, |
| { |
| "dataPath": "params_shard_22.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.2.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2be994479a3e9299ebd19ad19b468a5d" |
| }, |
| { |
| "dataPath": "params_shard_23.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.3.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4d532ef18e39e73a53712d836f5572b7" |
| }, |
| { |
| "dataPath": "params_shard_24.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.3.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2e4b432961cd77e0dbf1c3966adada95" |
| }, |
| { |
| "dataPath": "params_shard_25.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.3.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8a5b7422608712fc1c85e7052939b372" |
| }, |
| { |
| "dataPath": "params_shard_26.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.2.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.2.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.2.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.2.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.2.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.2.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.2.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.2.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.2.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.3.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.3.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.3.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.3.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.3.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.3.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.3.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.3.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.3.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "dd48cc2df11d4cc8e5a0744d58e53434" |
| }, |
| { |
| "dataPath": "params_shard_27.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.3.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4e758a47111ac878b0d1004a320f77d6" |
| }, |
| { |
| "dataPath": "params_shard_28.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.4.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6b2e75ee6fbadd41d4c05c769408f166" |
| }, |
| { |
| "dataPath": "params_shard_29.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.4.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "107cf68ffb72b7d2038f48ac7d70fc60" |
| }, |
| { |
| "dataPath": "params_shard_30.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.4.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8e143ab7f80422fc54459c4a1138c79b" |
| }, |
| { |
| "dataPath": "params_shard_31.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.4.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cf409852ac66d6c0f1a9f68ea9bce9e9" |
| }, |
| { |
| "dataPath": "params_shard_32.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.3.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.3.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.3.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.4.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.4.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.4.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.4.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.4.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.4.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.4.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.4.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.4.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.4.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.4.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "9b3380925f6348de1275f416981565e4" |
| }, |
| { |
| "dataPath": "params_shard_33.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.5.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "507aa7fbea7f5ccc11d7f47bd7dccded" |
| }, |
| { |
| "dataPath": "params_shard_34.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.4.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.5.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.5.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.5.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.5.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "49bfbd5c820118a6bb95ba5981d7b28d" |
| }, |
| { |
| "dataPath": "params_shard_35.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.5.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "94304452ad1b1ddba89430224cf5e2bb" |
| }, |
| { |
| "dataPath": "params_shard_36.bin", |
| "format": "raw-shard", |
| "nbytes": 25165824, |
| "records": [ |
| { |
| "name": "transformer.wpe.q_weight", |
| "shape": [ |
| 8192, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 25165824, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d222cfc4fbec8e71e417524cacb7bcfa" |
| }, |
| { |
| "dataPath": "params_shard_37.bin", |
| "format": "raw-shard", |
| "nbytes": 150994944, |
| "records": [ |
| { |
| "name": "transformer.wte.q_weight", |
| "shape": [ |
| 49152, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 150994944, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "babcefb413e4f053531159532b1603c5" |
| }, |
| { |
| "dataPath": "params_shard_38.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.wte.q_scale", |
| "shape": [ |
| 49152, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2264d9cdd42deb032041fc1f527c7768" |
| }, |
| { |
| "dataPath": "params_shard_39.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.10.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "997171f727125282a44a75bd8e72af1e" |
| }, |
| { |
| "dataPath": "params_shard_40.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.10.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "411ad9d5c020f21d73b2c9859d15ccd8" |
| }, |
| { |
| "dataPath": "params_shard_41.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.10.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "69eaef092310789038670bc4e1d9d164" |
| }, |
| { |
| "dataPath": "params_shard_42.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.10.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "445a4ee984f81d497db02108e3422c2c" |
| }, |
| { |
| "dataPath": "params_shard_43.bin", |
| "format": "raw-shard", |
| "nbytes": 29430272, |
| "records": [ |
| { |
| "name": "transformer.h.5.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.5.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.5.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.5.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.5.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.5.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.5.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.wpe.q_scale", |
| "shape": [ |
| 8192, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 3145728, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.10.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 15040512 |
| }, |
| { |
| "name": "transformer.h.10.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 15053312 |
| }, |
| { |
| "name": "transformer.h.10.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 17510912 |
| }, |
| { |
| "name": "transformer.h.10.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 17523200 |
| }, |
| { |
| "name": "transformer.h.10.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 19882496 |
| }, |
| { |
| "name": "transformer.h.10.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 19894784 |
| }, |
| { |
| "name": "transformer.h.10.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 19907072 |
| }, |
| { |
| "name": "transformer.h.10.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 19919360 |
| }, |
| { |
| "name": "transformer.h.10.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 19931648 |
| }, |
| { |
| "name": "transformer.h.10.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 19980800 |
| }, |
| { |
| "name": "transformer.h.10.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 29417984 |
| } |
| ], |
| "md5sum": "7dd4f3533ddef5fff3f934212d92ce7c" |
| }, |
| { |
| "dataPath": "params_shard_44.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.11.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "49587dab77a1a737f36e88db54281879" |
| }, |
| { |
| "dataPath": "params_shard_45.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.10.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.11.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.11.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.11.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.11.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "5d4d02f5632affbb85c67f79b1f9a2a4" |
| }, |
| { |
| "dataPath": "params_shard_46.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.11.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c852bea71100fcab68c49cf37ca98ddc" |
| }, |
| { |
| "dataPath": "params_shard_47.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.11.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "10fe275e31c5e312d15468acf3f6b94c" |
| }, |
| { |
| "dataPath": "params_shard_48.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.12.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "869d51a3fd6f40e08fdf3babf5ed92ed" |
| }, |
| { |
| "dataPath": "params_shard_49.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.5.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4444b8eb4ab2d35d7dab5faca96a6493" |
| }, |
| { |
| "dataPath": "params_shard_50.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.6.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ee0ba99d566136756b211382c1ab1b1d" |
| }, |
| { |
| "dataPath": "params_shard_51.bin", |
| "format": "raw-shard", |
| "nbytes": 33301504, |
| "records": [ |
| { |
| "name": "transformer.h.11.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.11.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.11.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.11.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.11.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.11.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.11.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.11.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.11.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.12.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.12.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.12.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.12.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.5.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23839232 |
| }, |
| { |
| "name": "transformer.h.5.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23851520 |
| }, |
| { |
| "name": "transformer.h.6.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 33288704 |
| } |
| ], |
| "md5sum": "e426935f44ffb1f68e4d0fedd3fbe18c" |
| }, |
| { |
| "dataPath": "params_shard_52.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.6.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "91c4f854c771d0cf03fbde9d20c7928e" |
| }, |
| { |
| "dataPath": "params_shard_53.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.6.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "32ee966573e0643ec25ffe1f36bedc3c" |
| }, |
| { |
| "dataPath": "params_shard_54.bin", |
| "format": "raw-shard", |
| "nbytes": 33251328, |
| "records": [ |
| { |
| "name": "transformer.h.6.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.6.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.6.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 2469888 |
| }, |
| { |
| "name": "transformer.h.6.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.6.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23703552 |
| }, |
| { |
| "name": "transformer.h.6.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23715840 |
| }, |
| { |
| "name": "transformer.h.6.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728128 |
| }, |
| { |
| "name": "transformer.h.6.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740416 |
| }, |
| { |
| "name": "transformer.h.6.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23752704 |
| }, |
| { |
| "name": "transformer.h.6.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23801856 |
| }, |
| { |
| "name": "transformer.h.6.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33239040 |
| } |
| ], |
| "md5sum": "fcb61b52ac35392dce73b5274e3a4f6d" |
| }, |
| { |
| "dataPath": "params_shard_55.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.7.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "09ea0c114d334279668813cd4bde6ea6" |
| }, |
| { |
| "dataPath": "params_shard_56.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.6.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.7.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.7.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.7.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.7.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "f267f9afaee2414c60114d460c323b31" |
| }, |
| { |
| "dataPath": "params_shard_57.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.7.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7e2ebc903f1f7f37d1901b6b9ac38cf3" |
| }, |
| { |
| "dataPath": "params_shard_58.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.7.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b45f6894e1dc84f40d9390374f7dc290" |
| }, |
| { |
| "dataPath": "params_shard_59.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.8.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6a474854afce52990d4a395bd2ae12f0" |
| }, |
| { |
| "dataPath": "params_shard_60.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.8.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "afbbd7c4b89f87d390cf2405d8ee8523" |
| }, |
| { |
| "dataPath": "params_shard_61.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.8.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "866348524c61711d88f824a1db5d5d6e" |
| }, |
| { |
| "dataPath": "params_shard_62.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.7.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.7.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.7.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.7.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.7.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.7.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.7.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.7.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.7.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.8.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.8.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.8.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.8.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.8.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.8.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.8.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.8.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.8.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "5dea17d4a41e64683a66498748dfc66e" |
| }, |
| { |
| "dataPath": "params_shard_63.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.8.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2c71df6d0ca289385dde0430939c7dd0" |
| }, |
| { |
| "dataPath": "params_shard_64.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.9.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fdb3806ea16c6a5f9b7137022aa692ae" |
| }, |
| { |
| "dataPath": "params_shard_65.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.9.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9a72fc2e5b14dda8a84221c35c2be2d8" |
| }, |
| { |
| "dataPath": "params_shard_66.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.9.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "26291d69203dffdb4b623b4d075acb7a" |
| }, |
| { |
| "dataPath": "params_shard_67.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.9.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "639567378ea8a0b71c20489f94930e82" |
| }, |
| { |
| "dataPath": "params_shard_68.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.8.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.8.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.8.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.9.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.9.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.9.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.9.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.9.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.9.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.9.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.9.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.9.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.9.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.9.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "dfa35ae56731bc34e0a4416c6a802933" |
| }, |
| { |
| "dataPath": "params_shard_69.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.12.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d6c540589f42dbd5cc46255108b45e6e" |
| }, |
| { |
| "dataPath": "params_shard_70.bin", |
| "format": "raw-shard", |
| "nbytes": 30756864, |
| "records": [ |
| { |
| "name": "transformer.h.9.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.12.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.12.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.12.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 28323840 |
| }, |
| { |
| "name": "transformer.h.12.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 30683136 |
| }, |
| { |
| "name": "transformer.h.12.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 30695424 |
| }, |
| { |
| "name": "transformer.h.12.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 30707712 |
| } |
| ], |
| "md5sum": "c50334c9be1c7b7165b44413a4229d2e" |
| }, |
| { |
| "dataPath": "params_shard_71.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.12.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e861e81980615136c9b6182a4381947f" |
| }, |
| { |
| "dataPath": "params_shard_72.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.13.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bbd415368c2c5dc748f4cfbc7bf89956" |
| }, |
| { |
| "dataPath": "params_shard_73.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.13.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0374aba60b137e3fea7968fecd3f9b64" |
| }, |
| { |
| "dataPath": "params_shard_74.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.13.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8dbb90af792131bac5df5b8a738a4467" |
| }, |
| { |
| "dataPath": "params_shard_75.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.13.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d8866a59d5aed528a1889eb2d11c0b91" |
| }, |
| { |
| "dataPath": "params_shard_76.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.12.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.12.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.12.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.13.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.13.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.13.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.13.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.13.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.13.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.13.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.13.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.13.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.13.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.13.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "f2ca060c4d6c99d620813b126b607255" |
| }, |
| { |
| "dataPath": "params_shard_77.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.14.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "584d95cca7023d6ebb1aed5751ba47c5" |
| }, |
| { |
| "dataPath": "params_shard_78.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.13.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.14.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.14.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.14.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.14.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "70443ea582b2dfe15a0dcad53e30af2d" |
| }, |
| { |
| "dataPath": "params_shard_79.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.14.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0a436b5781b6425e631ea1da821f2b71" |
| }, |
| { |
| "dataPath": "params_shard_80.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.14.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3f81ea6401fa1355b1fb6179c61f6fd3" |
| }, |
| { |
| "dataPath": "params_shard_81.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.15.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "52894e59d1012a18b7ee108af5f1241c" |
| }, |
| { |
| "dataPath": "params_shard_82.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.15.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "867fbc93f990ac267aaf08d8206a950b" |
| }, |
| { |
| "dataPath": "params_shard_83.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.15.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "68ba6c14f0ed7a4890400377da8fa5d9" |
| }, |
| { |
| "dataPath": "params_shard_84.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.14.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.14.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.14.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.14.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.14.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.14.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.14.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.14.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.14.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.15.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.15.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.15.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.15.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.15.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.15.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.15.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.15.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.15.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "f188961bef546fc4ce9816e6ed395155" |
| }, |
| { |
| "dataPath": "params_shard_85.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.15.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "a36c755b2ebcfd9a9eabc96b8420e18e" |
| }, |
| { |
| "dataPath": "params_shard_86.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.16.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "62840090d164e0e7a9dacefb0c4c63da" |
| }, |
| { |
| "dataPath": "params_shard_87.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.16.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "37c77cd53e7071bc63459a36c15fd08b" |
| }, |
| { |
| "dataPath": "params_shard_88.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.16.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fe7f24e364f1ad2d04260ffaa05a537c" |
| }, |
| { |
| "dataPath": "params_shard_89.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.16.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "04b7c1b29a28f73273a23469eb8ed983" |
| }, |
| { |
| "dataPath": "params_shard_90.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.15.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.15.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.15.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.16.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.16.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.16.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.16.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.16.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.16.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.16.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.16.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.16.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.16.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.16.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "7e4dfbab41e6509eb9b8d2773b28fed5" |
| }, |
| { |
| "dataPath": "params_shard_91.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.17.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ad85f1072ceba1420d5a48d7b0e24097" |
| }, |
| { |
| "dataPath": "params_shard_92.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.16.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.17.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.17.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.17.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.17.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "925535ab1de95a11bd0f8bf3eccd1827" |
| }, |
| { |
| "dataPath": "params_shard_93.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.17.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "260801454421f59c0f21adc8b4c8e75a" |
| }, |
| { |
| "dataPath": "params_shard_94.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.17.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "16b0cd04ec30fab0ee415f2996bed0fe" |
| }, |
| { |
| "dataPath": "params_shard_95.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.18.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1c33c6ec81846fda7ec5049f24bfc6ff" |
| }, |
| { |
| "dataPath": "params_shard_96.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.18.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e43c9b17ecf4048039d23e4963a65826" |
| }, |
| { |
| "dataPath": "params_shard_97.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.18.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fb58b5ecddf6d5e645e5c2f56b9ce4d9" |
| }, |
| { |
| "dataPath": "params_shard_98.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.17.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.17.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.17.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.17.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.17.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.17.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.17.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.17.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.17.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.18.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.18.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.18.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.18.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.18.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.18.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.18.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.18.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.18.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "44eedba83e95cb0fc371e85dad061a72" |
| }, |
| { |
| "dataPath": "params_shard_99.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.18.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "23776761b7d6d5dcbf8d0bd3daf77e04" |
| }, |
| { |
| "dataPath": "params_shard_100.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.19.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d1f584fee39c9bf061fe8d5956fa1c4a" |
| }, |
| { |
| "dataPath": "params_shard_101.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.19.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bf04e079e21848e26d10f6727e2eb9cd" |
| }, |
| { |
| "dataPath": "params_shard_102.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.19.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "00e543d447c27cc2aeede2f5a2a71e08" |
| }, |
| { |
| "dataPath": "params_shard_103.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.19.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0a81513dbbece2c86c72a6438a3f7648" |
| }, |
| { |
| "dataPath": "params_shard_104.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.18.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.18.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.18.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.19.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.19.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.19.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.19.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.19.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.19.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.19.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.19.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.19.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.19.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.19.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "519d1c4d5b4ae091da308478ec77e8a7" |
| }, |
| { |
| "dataPath": "params_shard_105.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.20.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "48bd1b290ea35af99dd8f2ed381f1c36" |
| }, |
| { |
| "dataPath": "params_shard_106.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.19.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.20.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.20.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.20.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.20.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "d2762dfe4809e876d8c1d17162466002" |
| }, |
| { |
| "dataPath": "params_shard_107.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.20.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b31e63bb23b201f5ab4e8bc752786586" |
| }, |
| { |
| "dataPath": "params_shard_108.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.20.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "71531c3ae56ed1ae7e682fc1cd637fb0" |
| }, |
| { |
| "dataPath": "params_shard_109.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.21.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "eaf9bc6e47a7fd9db726dc1694da998a" |
| }, |
| { |
| "dataPath": "params_shard_110.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.21.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0edfd2ce951dfbf9aa89fb14d7e04a71" |
| }, |
| { |
| "dataPath": "params_shard_111.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.21.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dc92d34ac3d42336e10d49ad40e9c04a" |
| }, |
| { |
| "dataPath": "params_shard_112.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.20.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.20.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.20.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.20.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.20.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.20.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.20.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.20.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.20.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.21.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.21.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.21.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.21.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.21.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.21.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.21.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.21.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.21.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "59bd9da2feef21831a5b25aad02ce3b6" |
| }, |
| { |
| "dataPath": "params_shard_113.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.21.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "189f0e21a7b2e5c8e4ed43b5647d68c2" |
| }, |
| { |
| "dataPath": "params_shard_114.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.22.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0a535002c5bc73ad1d27229599f45ba4" |
| }, |
| { |
| "dataPath": "params_shard_115.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.22.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2968a09e19563a0cce55be9217026b7a" |
| }, |
| { |
| "dataPath": "params_shard_116.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.22.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0cb3c352666aec53547619c43dd697a2" |
| }, |
| { |
| "dataPath": "params_shard_117.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.22.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "e4a40f4630aef4ef9faa8734e4c047d5" |
| }, |
| { |
| "dataPath": "params_shard_118.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.21.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.21.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.21.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.22.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.22.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.22.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.22.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.22.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.22.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.22.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.22.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.22.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.22.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.22.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "f06a2aacc4166332127b94769ba09d4e" |
| }, |
| { |
| "dataPath": "params_shard_119.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.23.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c4eefcafeb07017bd657c6240e5b2952" |
| }, |
| { |
| "dataPath": "params_shard_120.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.22.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.23.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.23.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.23.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.23.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "4be6e6792787aee972a1fbe47c15fadc" |
| }, |
| { |
| "dataPath": "params_shard_121.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.23.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dbbf773a1072e3ee04571a100956a326" |
| }, |
| { |
| "dataPath": "params_shard_122.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.23.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "717e293bff506ab7670c3e0cc52d2056" |
| }, |
| { |
| "dataPath": "params_shard_123.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.24.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9faa15643751e9937f6c311284fe4365" |
| }, |
| { |
| "dataPath": "params_shard_124.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.24.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9591babcff75f8437390e968c4f306b9" |
| }, |
| { |
| "dataPath": "params_shard_125.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.24.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "779b08ee797726a942e2964c59f70f6b" |
| }, |
| { |
| "dataPath": "params_shard_126.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.23.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.23.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.23.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.23.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.23.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.23.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.23.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.23.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.23.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.24.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.24.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.24.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.24.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.24.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.24.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.24.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.24.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.24.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "ef100068ffe87f883e3324f5a7c02064" |
| }, |
| { |
| "dataPath": "params_shard_127.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.24.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6210a54f0e211c91ed13ba3554dfaa6c" |
| }, |
| { |
| "dataPath": "params_shard_128.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.25.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f2991d1b393db018a2798ba35a7e66d6" |
| }, |
| { |
| "dataPath": "params_shard_129.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.25.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "60981b9625473f545b68f51fc831174f" |
| }, |
| { |
| "dataPath": "params_shard_130.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.25.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "53e5c68c1878a3060f2ae2e9d050656b" |
| }, |
| { |
| "dataPath": "params_shard_131.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.25.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "0597e5b704ad33df151191ff1f2ffe94" |
| }, |
| { |
| "dataPath": "params_shard_132.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.24.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.24.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.24.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.25.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.25.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.25.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.25.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.25.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21381632 |
| }, |
| { |
| "name": "transformer.h.25.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21393920 |
| }, |
| { |
| "name": "transformer.h.25.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.25.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.25.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.25.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.25.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "d10ddc848165558fe15d1aad65f4bde3" |
| }, |
| { |
| "dataPath": "params_shard_133.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.26.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "59f2a44947767d9743d1c3c4c7a787eb" |
| }, |
| { |
| "dataPath": "params_shard_134.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.25.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.26.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.26.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.26.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.26.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "a5fa7bd89d39b2466de2a09b048d0c20" |
| }, |
| { |
| "dataPath": "params_shard_135.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.26.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ed69da31d0bb6d97e3fcd2f004fe9767" |
| }, |
| { |
| "dataPath": "params_shard_136.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.26.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7e12dcdd1b19edfb77ba9bd595f9e9c6" |
| }, |
| { |
| "dataPath": "params_shard_137.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.27.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8a2f8f92a35b47201829ac4f43969314" |
| }, |
| { |
| "dataPath": "params_shard_138.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.27.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "848a9bcdaa4503454ec12f212530ccb6" |
| }, |
| { |
| "dataPath": "params_shard_139.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.27.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "6bf09aa8a402fb07423d2bf14adf72ec" |
| }, |
| { |
| "dataPath": "params_shard_140.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.26.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.26.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.26.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.26.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.26.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.26.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.26.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.26.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.26.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.27.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.27.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.27.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.27.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.27.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.27.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.27.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.27.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.27.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "d8a22020a4fbbd4994c68f51e13625fb" |
| }, |
| { |
| "dataPath": "params_shard_141.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.27.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "c8f74c523dcb8bd314733c76a5099bf1" |
| }, |
| { |
| "dataPath": "params_shard_142.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.28.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "56ed96c6c1eb3d8c19ed516b85a0de7a" |
| }, |
| { |
| "dataPath": "params_shard_143.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.28.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "063fd180bd3fc2be91501d946895813f" |
| }, |
| { |
| "dataPath": "params_shard_144.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.28.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ee6226a3decf7f4ed812982e86354928" |
| }, |
| { |
| "dataPath": "params_shard_145.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.28.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "259cba18c37e1179d0fc0adbe5ad20ea" |
| }, |
| { |
| "dataPath": "params_shard_146.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.27.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.27.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.27.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.28.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.28.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.28.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.28.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.28.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.28.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.28.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.28.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.28.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.28.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.28.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "5def0e37882ed405e93cb15824797932" |
| }, |
| { |
| "dataPath": "params_shard_147.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.29.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bab2b0db03aa2f9594d5d1b6e25f7c08" |
| }, |
| { |
| "dataPath": "params_shard_148.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.28.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.29.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.29.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.29.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.29.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "93152b73d70a5cd5a70a207f615f57c1" |
| }, |
| { |
| "dataPath": "params_shard_149.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.29.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bc6bbfdc2bb96882a519e4700584c901" |
| }, |
| { |
| "dataPath": "params_shard_150.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.29.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "3acfe7797101f908cc98a7a50136ac35" |
| }, |
| { |
| "dataPath": "params_shard_151.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.30.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8e200083e56277fd0cf98517df2f0b7d" |
| }, |
| { |
| "dataPath": "params_shard_152.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.30.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bd9bf98cb088230c1cb5e4b65b19b6d7" |
| }, |
| { |
| "dataPath": "params_shard_153.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.30.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "46bdd5a9f7aae703931bed7bd67c0b6e" |
| }, |
| { |
| "dataPath": "params_shard_154.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.29.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.29.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.29.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.29.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.29.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.29.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.29.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.29.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.29.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.30.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.30.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.30.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.30.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.30.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.30.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.30.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.30.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.30.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "246b45bc71ad5f8e53ac00ee1bffe5f8" |
| }, |
| { |
| "dataPath": "params_shard_155.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.30.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "582aa8a87bc78236d4fbd477eb8d5a71" |
| }, |
| { |
| "dataPath": "params_shard_156.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.31.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dd7ba8d91260f6f9f427d3a2d5aa307e" |
| }, |
| { |
| "dataPath": "params_shard_157.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.31.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "9ca002e848cba0e5620783d7b859b886" |
| }, |
| { |
| "dataPath": "params_shard_158.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.31.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "d12f6ad5521046affacf2800ed678593" |
| }, |
| { |
| "dataPath": "params_shard_159.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.31.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "108b2da251b9ccd5a3468747ae821e40" |
| }, |
| { |
| "dataPath": "params_shard_160.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.30.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.30.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.30.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.31.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.31.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.31.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.31.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.31.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.31.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.31.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.31.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.31.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.31.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.31.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "e3f20fbdf0a039cd51c5dcc160a7c48d" |
| }, |
| { |
| "dataPath": "params_shard_161.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.32.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "ec2181c190e6991fb8a6d43b57aec4b9" |
| }, |
| { |
| "dataPath": "params_shard_162.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.31.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.32.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.32.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.32.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.32.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "23ec1a5c30b3c2ed0544b096ab4d36fe" |
| }, |
| { |
| "dataPath": "params_shard_163.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.32.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "7e9da361914dfce9df09676ac45bdce7" |
| }, |
| { |
| "dataPath": "params_shard_164.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.32.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "cab1cf77a4c6702480a96dc55a5b2c29" |
| }, |
| { |
| "dataPath": "params_shard_165.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.33.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "00ae4b7fc6e870fe17c9b88738c80e30" |
| }, |
| { |
| "dataPath": "params_shard_166.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.33.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "bc26bf7cdcee9ea400b1c5827556eeb1" |
| }, |
| { |
| "dataPath": "params_shard_167.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.33.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "57b2ff92feb583bfc61e1cb90d7c2a35" |
| }, |
| { |
| "dataPath": "params_shard_168.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.32.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.32.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.32.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.32.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.32.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.32.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.32.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.32.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.32.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.33.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.33.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.33.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.33.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.33.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.33.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.33.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.33.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.33.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "c0f0eda081711fcaca7f33a50bba8227" |
| }, |
| { |
| "dataPath": "params_shard_169.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.33.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b68979fb6ccde5ac89089f712415a814" |
| }, |
| { |
| "dataPath": "params_shard_170.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.34.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b797ea40687985202fe7e41ecbd7dad2" |
| }, |
| { |
| "dataPath": "params_shard_171.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.34.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "8304550c686860c00065e3f392b8b93d" |
| }, |
| { |
| "dataPath": "params_shard_172.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.34.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "2d5b2e72c7e012d68cc951c4d3a7f49d" |
| }, |
| { |
| "dataPath": "params_shard_173.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.34.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "fa45f9fb41e4a46acdea87a68acff9e0" |
| }, |
| { |
| "dataPath": "params_shard_174.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.33.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.33.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.33.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.34.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.34.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.34.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.34.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.34.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.34.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.34.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.34.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.34.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.34.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.34.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "bb2fea1605ac8df7fdc731da13bb71bc" |
| }, |
| { |
| "dataPath": "params_shard_175.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.35.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1efabdeae2ae9fd4e83a7f9ac5253726" |
| }, |
| { |
| "dataPath": "params_shard_176.bin", |
| "format": "raw-shard", |
| "nbytes": 31580672, |
| "records": [ |
| { |
| "name": "transformer.h.34.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.35.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.35.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.35.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.35.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| } |
| ], |
| "md5sum": "61ef8ebd9a50e3bfcfa1d8eb7b66d1aa" |
| }, |
| { |
| "dataPath": "params_shard_177.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.35.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4c82015285fb52e0e4b23a023ffef027" |
| }, |
| { |
| "dataPath": "params_shard_178.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.35.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "1a9c4da81a1c623a45d69a4b7c1b6449" |
| }, |
| { |
| "dataPath": "params_shard_179.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.36.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "f3b2fb19fe71b291dd529243f417bddf" |
| }, |
| { |
| "dataPath": "params_shard_180.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.36.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "552c819d816545a1df8b6f240ccaf520" |
| }, |
| { |
| "dataPath": "params_shard_181.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.36.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "705c5e2b2225b9c534a36a0e3c288c4c" |
| }, |
| { |
| "dataPath": "params_shard_182.bin", |
| "format": "raw-shard", |
| "nbytes": 26284544, |
| "records": [ |
| { |
| "name": "transformer.h.35.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.35.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2359296 |
| }, |
| { |
| "name": "transformer.h.35.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2371584 |
| }, |
| { |
| "name": "transformer.h.35.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2383872 |
| }, |
| { |
| "name": "transformer.h.35.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 2396160 |
| }, |
| { |
| "name": "transformer.h.35.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 2408448 |
| }, |
| { |
| "name": "transformer.h.35.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 2457600 |
| }, |
| { |
| "name": "transformer.h.35.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 11894784 |
| }, |
| { |
| "name": "transformer.h.35.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 11907072 |
| }, |
| { |
| "name": "transformer.h.36.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 21344256 |
| }, |
| { |
| "name": "transformer.h.36.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.36.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23814656 |
| }, |
| { |
| "name": "transformer.h.36.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.36.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26186240 |
| }, |
| { |
| "name": "transformer.h.36.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26198528 |
| }, |
| { |
| "name": "transformer.h.36.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26210816 |
| }, |
| { |
| "name": "transformer.h.36.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 26223104 |
| }, |
| { |
| "name": "transformer.h.36.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 26235392 |
| } |
| ], |
| "md5sum": "6c89b81049a768bb00a2edc703096068" |
| }, |
| { |
| "dataPath": "params_shard_183.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.36.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "b711e552379d561d798a3f7b9eaad35f" |
| }, |
| { |
| "dataPath": "params_shard_184.bin", |
| "format": "raw-shard", |
| "nbytes": 19660800, |
| "records": [ |
| { |
| "name": "transformer.h.37.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "45946233a139ee5eb0d1bd1286ba9a83" |
| }, |
| { |
| "dataPath": "params_shard_185.bin", |
| "format": "raw-shard", |
| "nbytes": 18874368, |
| "records": [ |
| { |
| "name": "transformer.h.37.attn.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 18874368, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "4aefa39dcbdac601e387adbdf9af3e35" |
| }, |
| { |
| "dataPath": "params_shard_186.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.37.mlp.c_fc.q_weight", |
| "shape": [ |
| 24576, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "009466fa17ed38a4982e073fb5c45ed6" |
| }, |
| { |
| "dataPath": "params_shard_187.bin", |
| "format": "raw-shard", |
| "nbytes": 75497472, |
| "records": [ |
| { |
| "name": "transformer.h.37.mlp.c_proj.q_weight", |
| "shape": [ |
| 6144, |
| 3072 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 75497472, |
| "byteOffset": 0 |
| } |
| ], |
| "md5sum": "dd8a3c07257ae8438e9e5efb69370338" |
| }, |
| { |
| "dataPath": "params_shard_188.bin", |
| "format": "raw-shard", |
| "nbytes": 33276416, |
| "records": [ |
| { |
| "name": "transformer.h.36.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.36.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.36.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 9449472 |
| }, |
| { |
| "name": "transformer.h.37.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 18886656 |
| }, |
| { |
| "name": "transformer.h.37.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 18899456 |
| }, |
| { |
| "name": "transformer.h.37.attn.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 21357056 |
| }, |
| { |
| "name": "transformer.h.37.attn.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2359296, |
| "byteOffset": 21369344 |
| }, |
| { |
| "name": "transformer.h.37.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23728640 |
| }, |
| { |
| "name": "transformer.h.37.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23740928 |
| }, |
| { |
| "name": "transformer.h.37.ln_2.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23753216 |
| }, |
| { |
| "name": "transformer.h.37.ln_2.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 23765504 |
| }, |
| { |
| "name": "transformer.h.37.mlp.c_fc.bias", |
| "shape": [ |
| 24576 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 49152, |
| "byteOffset": 23777792 |
| }, |
| { |
| "name": "transformer.h.37.mlp.c_fc.q_scale", |
| "shape": [ |
| 24576, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 23826944 |
| }, |
| { |
| "name": "transformer.h.37.mlp.c_proj.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 33264128 |
| } |
| ], |
| "md5sum": "913f3518d60b3fe2473348030eb4e5f3" |
| }, |
| { |
| "dataPath": "params_shard_189.bin", |
| "format": "raw-shard", |
| "nbytes": 31592960, |
| "records": [ |
| { |
| "name": "transformer.h.37.mlp.c_proj.q_scale", |
| "shape": [ |
| 6144, |
| 768 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 9437184, |
| "byteOffset": 0 |
| }, |
| { |
| "name": "transformer.h.38.attn.c_attn.bias", |
| "shape": [ |
| 6400 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12800, |
| "byteOffset": 9437184 |
| }, |
| { |
| "name": "transformer.h.38.attn.c_attn.q_weight", |
| "shape": [ |
| 6400, |
| 768 |
| ], |
| "dtype": "uint32", |
| "format": "f32-to-bf16", |
| "nbytes": 19660800, |
| "byteOffset": 9449984 |
| }, |
| { |
| "name": "transformer.h.38.attn.c_attn.q_scale", |
| "shape": [ |
| 6400, |
| 192 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 2457600, |
| "byteOffset": 29110784 |
| }, |
| { |
| "name": "transformer.h.38.ln_1.bias", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31568384 |
| }, |
| { |
| "name": "transformer.h.38.ln_1.weight", |
| "shape": [ |
| 6144 |
| ], |
| "dtype": "float16", |
| "format": "f32-to-bf16", |
| "nbytes": 12288, |
| "byteOffset": 31580672 |
| } |
| ], |
| "md5sum": "903d85cf3e729cd1dc5a7ab56c7dcfcc" |
| } |
| ] |
| } |