starcoder-q4f16_1-MLC / ndarray-cache.json
junrushao's picture
Initial commit
bc0a3d8
raw
history blame
279 kB
{
"metadata": {
"ParamSize": 648,
"ParamBytes": 8902356992.0,
"BitsPerParam": 4.5019815935059295
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
49152,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "babcefb413e4f053531159532b1603c5"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.38.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5ae40837c56fc53a1e6867cc93ee441f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.38.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "2397855968fbee540027008677ddf020"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.38.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "dea2f1f44cd50fc82df9cbeb7ca29dd1"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 30769152,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
49152,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
},
{
"name": "transformer.h.38.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "transformer.h.38.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "transformer.h.38.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21245952
},
{
"name": "transformer.h.38.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21258240
},
{
"name": "transformer.h.38.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 21270528
},
{
"name": "transformer.h.38.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 21319680
},
{
"name": "transformer.h.38.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30756864
}
],
"md5sum": "bc501ee66f72f3f28eea2a0dda3e9872"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.39.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0886e966a07a80c3e1a4170221d333ab"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.38.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.39.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.39.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.39.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.39.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "e49fa3e84a78e3a2e53b084165a1c521"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.39.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0c8ee868d1e77ca9833a15d68538d538"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.39.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "562d7e70fd8ebe1e00887ffe19e1972c"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.0.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "b3a6a1ec978b9ed8f6262c9a12da107a"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.0.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "20ffc69b60fb065cac8601db108435f4"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.0.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "a8ff096b5c3ea5e18273979807e9e68e"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 26309120,
"records": [
{
"name": "transformer.h.39.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.39.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.39.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.39.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.39.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.39.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.39.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.39.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.39.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.ln_f.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21344256
},
{
"name": "transformer.ln_f.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21356544
},
{
"name": "transformer.h.0.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21368832
},
{
"name": "transformer.h.0.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21381632
},
{
"name": "transformer.h.0.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23839232
},
{
"name": "transformer.h.0.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23851520
},
{
"name": "transformer.h.0.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.0.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.0.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26235392
},
{
"name": "transformer.h.0.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26247680
},
{
"name": "transformer.h.0.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26259968
}
],
"md5sum": "856d05c256b46f8c834824faeb3ff628"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.0.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "a6bbaca2578b4639dbd2d54510beddbf"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.1.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "80cf30393e14efab29306e340d0ce595"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.1.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "cffaac7091ff103b4fa07c4d4561caa3"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.1.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "3d845f06b6924fb5da9d9e39a7da3894"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.1.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0752298d33b77e2a5abfff59e0002246"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.0.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.0.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.0.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.1.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.1.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.1.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.1.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.1.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.1.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.1.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.1.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.1.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.1.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.1.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "00388c94dedf014b0205e210d3d24a82"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.2.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "7563282d813947f7c4011a0e6765ed46"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.1.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.2.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.2.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.2.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.2.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "383a9bfbf85a08c9822f4479134e08c8"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.2.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "a6093d6d592c71b32c331d92015de46a"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.2.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "2be994479a3e9299ebd19ad19b468a5d"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.3.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "4d532ef18e39e73a53712d836f5572b7"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.3.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2e4b432961cd77e0dbf1c3966adada95"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.3.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "8a5b7422608712fc1c85e7052939b372"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.2.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.2.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.2.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.2.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.2.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.2.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.2.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.2.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.2.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.3.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.3.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.3.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.3.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.3.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.3.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.3.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.3.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.3.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "dd48cc2df11d4cc8e5a0744d58e53434"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.3.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "4e758a47111ac878b0d1004a320f77d6"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.4.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "6b2e75ee6fbadd41d4c05c769408f166"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.4.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "107cf68ffb72b7d2038f48ac7d70fc60"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.4.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "8e143ab7f80422fc54459c4a1138c79b"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.4.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "cf409852ac66d6c0f1a9f68ea9bce9e9"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.3.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.3.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.3.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.4.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.4.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.4.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.4.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.4.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.4.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.4.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.4.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.4.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.4.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.4.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "9b3380925f6348de1275f416981565e4"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.5.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "507aa7fbea7f5ccc11d7f47bd7dccded"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.4.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.5.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.5.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.5.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.5.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "49bfbd5c820118a6bb95ba5981d7b28d"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.5.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "94304452ad1b1ddba89430224cf5e2bb"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "transformer.wpe.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d222cfc4fbec8e71e417524cacb7bcfa"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 150994944,
"records": [
{
"name": "transformer.wte.q_weight",
"shape": [
49152,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 150994944,
"byteOffset": 0
}
],
"md5sum": "babcefb413e4f053531159532b1603c5"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.wte.q_scale",
"shape": [
49152,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2264d9cdd42deb032041fc1f527c7768"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.10.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "997171f727125282a44a75bd8e72af1e"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.10.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "411ad9d5c020f21d73b2c9859d15ccd8"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.10.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "69eaef092310789038670bc4e1d9d164"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.10.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "445a4ee984f81d497db02108e3422c2c"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29430272,
"records": [
{
"name": "transformer.h.5.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.5.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.5.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.5.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.5.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.5.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.5.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.wpe.q_scale",
"shape": [
8192,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 11894784
},
{
"name": "transformer.h.10.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 15040512
},
{
"name": "transformer.h.10.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 15053312
},
{
"name": "transformer.h.10.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 17510912
},
{
"name": "transformer.h.10.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 17523200
},
{
"name": "transformer.h.10.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19882496
},
{
"name": "transformer.h.10.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19894784
},
{
"name": "transformer.h.10.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19907072
},
{
"name": "transformer.h.10.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 19919360
},
{
"name": "transformer.h.10.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 19931648
},
{
"name": "transformer.h.10.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 19980800
},
{
"name": "transformer.h.10.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 29417984
}
],
"md5sum": "7dd4f3533ddef5fff3f934212d92ce7c"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.11.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "49587dab77a1a737f36e88db54281879"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.10.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.11.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.11.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.11.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.11.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "5d4d02f5632affbb85c67f79b1f9a2a4"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.11.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "c852bea71100fcab68c49cf37ca98ddc"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.11.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "10fe275e31c5e312d15468acf3f6b94c"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.12.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "869d51a3fd6f40e08fdf3babf5ed92ed"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.5.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "4444b8eb4ab2d35d7dab5faca96a6493"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.6.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "ee0ba99d566136756b211382c1ab1b1d"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 33301504,
"records": [
{
"name": "transformer.h.11.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.11.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.11.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.11.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.11.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.11.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.11.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.11.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.11.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.12.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.12.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.12.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.12.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23826944
},
{
"name": "transformer.h.5.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23839232
},
{
"name": "transformer.h.5.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23851520
},
{
"name": "transformer.h.6.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 33288704
}
],
"md5sum": "e426935f44ffb1f68e4d0fedd3fbe18c"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.6.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "91c4f854c771d0cf03fbde9d20c7928e"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.6.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "32ee966573e0643ec25ffe1f36bedc3c"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 33251328,
"records": [
{
"name": "transformer.h.6.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 0
},
{
"name": "transformer.h.6.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2457600
},
{
"name": "transformer.h.6.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 2469888
},
{
"name": "transformer.h.6.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21344256
},
{
"name": "transformer.h.6.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23703552
},
{
"name": "transformer.h.6.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23715840
},
{
"name": "transformer.h.6.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728128
},
{
"name": "transformer.h.6.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740416
},
{
"name": "transformer.h.6.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23752704
},
{
"name": "transformer.h.6.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23801856
},
{
"name": "transformer.h.6.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33239040
}
],
"md5sum": "fcb61b52ac35392dce73b5274e3a4f6d"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.7.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "09ea0c114d334279668813cd4bde6ea6"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.6.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.7.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.7.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.7.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.7.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "f267f9afaee2414c60114d460c323b31"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.7.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "7e2ebc903f1f7f37d1901b6b9ac38cf3"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.7.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b45f6894e1dc84f40d9390374f7dc290"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.8.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "6a474854afce52990d4a395bd2ae12f0"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.8.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "afbbd7c4b89f87d390cf2405d8ee8523"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.8.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "866348524c61711d88f824a1db5d5d6e"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.7.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.7.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.7.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.7.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.7.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.7.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.7.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.7.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.7.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.8.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.8.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.8.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.8.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.8.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.8.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.8.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.8.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.8.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "5dea17d4a41e64683a66498748dfc66e"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.8.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "2c71df6d0ca289385dde0430939c7dd0"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.9.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "fdb3806ea16c6a5f9b7137022aa692ae"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.9.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "9a72fc2e5b14dda8a84221c35c2be2d8"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.9.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "26291d69203dffdb4b623b4d075acb7a"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.9.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "639567378ea8a0b71c20489f94930e82"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.8.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.8.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.8.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.9.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.9.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.9.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.9.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.9.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.9.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.9.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.9.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.9.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.9.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.9.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "dfa35ae56731bc34e0a4416c6a802933"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.12.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "d6c540589f42dbd5cc46255108b45e6e"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 30756864,
"records": [
{
"name": "transformer.h.9.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.12.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.12.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 9449472
},
{
"name": "transformer.h.12.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 28323840
},
{
"name": "transformer.h.12.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30683136
},
{
"name": "transformer.h.12.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 30695424
},
{
"name": "transformer.h.12.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 30707712
}
],
"md5sum": "c50334c9be1c7b7165b44413a4229d2e"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.12.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "e861e81980615136c9b6182a4381947f"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.13.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "bbd415368c2c5dc748f4cfbc7bf89956"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.13.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0374aba60b137e3fea7968fecd3f9b64"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.13.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "8dbb90af792131bac5df5b8a738a4467"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.13.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "d8866a59d5aed528a1889eb2d11c0b91"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.12.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.12.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.12.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.13.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.13.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.13.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.13.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.13.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.13.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.13.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.13.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.13.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.13.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.13.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "f2ca060c4d6c99d620813b126b607255"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.14.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "584d95cca7023d6ebb1aed5751ba47c5"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.13.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.14.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.14.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.14.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.14.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "70443ea582b2dfe15a0dcad53e30af2d"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.14.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0a436b5781b6425e631ea1da821f2b71"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.14.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "3f81ea6401fa1355b1fb6179c61f6fd3"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.15.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "52894e59d1012a18b7ee108af5f1241c"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.15.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "867fbc93f990ac267aaf08d8206a950b"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.15.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "68ba6c14f0ed7a4890400377da8fa5d9"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.14.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.14.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.14.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.14.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.14.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.14.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.14.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.14.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.14.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.15.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.15.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.15.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.15.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.15.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.15.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.15.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.15.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.15.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "f188961bef546fc4ce9816e6ed395155"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.15.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "a36c755b2ebcfd9a9eabc96b8420e18e"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.16.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "62840090d164e0e7a9dacefb0c4c63da"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.16.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "37c77cd53e7071bc63459a36c15fd08b"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.16.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "fe7f24e364f1ad2d04260ffaa05a537c"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.16.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "04b7c1b29a28f73273a23469eb8ed983"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.15.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.15.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.15.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.16.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.16.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.16.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.16.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.16.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.16.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.16.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.16.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.16.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.16.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.16.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "7e4dfbab41e6509eb9b8d2773b28fed5"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.17.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "ad85f1072ceba1420d5a48d7b0e24097"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.16.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.17.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.17.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.17.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.17.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "925535ab1de95a11bd0f8bf3eccd1827"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.17.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "260801454421f59c0f21adc8b4c8e75a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.17.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "16b0cd04ec30fab0ee415f2996bed0fe"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.18.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "1c33c6ec81846fda7ec5049f24bfc6ff"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.18.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e43c9b17ecf4048039d23e4963a65826"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.18.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "fb58b5ecddf6d5e645e5c2f56b9ce4d9"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.17.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.17.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.17.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.17.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.17.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.17.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.17.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.17.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.17.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.18.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.18.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.18.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.18.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.18.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.18.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.18.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.18.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.18.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "44eedba83e95cb0fc371e85dad061a72"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.18.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "23776761b7d6d5dcbf8d0bd3daf77e04"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.19.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "d1f584fee39c9bf061fe8d5956fa1c4a"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.19.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bf04e079e21848e26d10f6727e2eb9cd"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.19.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "00e543d447c27cc2aeede2f5a2a71e08"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.19.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0a81513dbbece2c86c72a6438a3f7648"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.18.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.18.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.18.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.19.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.19.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.19.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.19.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.19.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.19.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.19.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.19.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.19.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.19.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.19.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "519d1c4d5b4ae091da308478ec77e8a7"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.20.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "48bd1b290ea35af99dd8f2ed381f1c36"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.19.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.20.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.20.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.20.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.20.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "d2762dfe4809e876d8c1d17162466002"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.20.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b31e63bb23b201f5ab4e8bc752786586"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.20.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "71531c3ae56ed1ae7e682fc1cd637fb0"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.21.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "eaf9bc6e47a7fd9db726dc1694da998a"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.21.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0edfd2ce951dfbf9aa89fb14d7e04a71"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.21.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "dc92d34ac3d42336e10d49ad40e9c04a"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.20.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.20.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.20.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.20.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.20.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.20.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.20.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.20.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.20.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.21.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.21.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.21.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.21.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.21.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.21.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.21.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.21.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.21.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "59bd9da2feef21831a5b25aad02ce3b6"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.21.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "189f0e21a7b2e5c8e4ed43b5647d68c2"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.22.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "0a535002c5bc73ad1d27229599f45ba4"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.22.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "2968a09e19563a0cce55be9217026b7a"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.22.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0cb3c352666aec53547619c43dd697a2"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.22.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "e4a40f4630aef4ef9faa8734e4c047d5"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.21.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.21.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.21.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.22.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.22.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.22.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.22.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.22.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.22.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.22.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.22.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.22.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.22.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.22.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "f06a2aacc4166332127b94769ba09d4e"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.23.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "c4eefcafeb07017bd657c6240e5b2952"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.22.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.23.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.23.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.23.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.23.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "4be6e6792787aee972a1fbe47c15fadc"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.23.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "dbbf773a1072e3ee04571a100956a326"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.23.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "717e293bff506ab7670c3e0cc52d2056"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.24.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "9faa15643751e9937f6c311284fe4365"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.24.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "9591babcff75f8437390e968c4f306b9"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.24.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "779b08ee797726a942e2964c59f70f6b"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.23.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.23.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.23.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.23.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.23.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.23.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.23.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.23.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.23.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.24.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.24.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.24.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.24.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.24.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.24.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.24.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.24.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.24.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "ef100068ffe87f883e3324f5a7c02064"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.24.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "6210a54f0e211c91ed13ba3554dfaa6c"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.25.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "f2991d1b393db018a2798ba35a7e66d6"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.25.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "60981b9625473f545b68f51fc831174f"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.25.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "53e5c68c1878a3060f2ae2e9d050656b"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.25.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "0597e5b704ad33df151191ff1f2ffe94"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.24.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.24.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.24.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.25.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.25.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.25.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.25.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21369344
},
{
"name": "transformer.h.25.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21381632
},
{
"name": "transformer.h.25.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21393920
},
{
"name": "transformer.h.25.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.25.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.25.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.25.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.25.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "d10ddc848165558fe15d1aad65f4bde3"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.26.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "59f2a44947767d9743d1c3c4c7a787eb"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.25.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.26.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.26.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.26.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.26.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "a5fa7bd89d39b2466de2a09b048d0c20"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.26.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "ed69da31d0bb6d97e3fcd2f004fe9767"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.26.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "7e12dcdd1b19edfb77ba9bd595f9e9c6"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.27.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "8a2f8f92a35b47201829ac4f43969314"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.27.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "848a9bcdaa4503454ec12f212530ccb6"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.27.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "6bf09aa8a402fb07423d2bf14adf72ec"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.26.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.26.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.26.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.26.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.26.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.26.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.26.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.26.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.26.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.27.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.27.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.27.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.27.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.27.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.27.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.27.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.27.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.27.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "d8a22020a4fbbd4994c68f51e13625fb"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.27.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "c8f74c523dcb8bd314733c76a5099bf1"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.28.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "56ed96c6c1eb3d8c19ed516b85a0de7a"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.28.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "063fd180bd3fc2be91501d946895813f"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.28.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "ee6226a3decf7f4ed812982e86354928"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.28.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "259cba18c37e1179d0fc0adbe5ad20ea"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.27.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.27.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.27.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.28.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.28.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.28.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.28.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.28.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.28.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.28.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.28.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.28.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.28.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.28.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "5def0e37882ed405e93cb15824797932"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.29.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bab2b0db03aa2f9594d5d1b6e25f7c08"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.28.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.29.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.29.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.29.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.29.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "93152b73d70a5cd5a70a207f615f57c1"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.29.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "bc6bbfdc2bb96882a519e4700584c901"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.29.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "3acfe7797101f908cc98a7a50136ac35"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.30.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "8e200083e56277fd0cf98517df2f0b7d"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.30.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bd9bf98cb088230c1cb5e4b65b19b6d7"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.30.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "46bdd5a9f7aae703931bed7bd67c0b6e"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.29.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.29.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.29.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.29.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.29.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.29.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.29.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.29.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.29.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.30.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.30.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.30.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.30.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.30.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.30.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.30.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.30.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.30.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "246b45bc71ad5f8e53ac00ee1bffe5f8"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.30.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "582aa8a87bc78236d4fbd477eb8d5a71"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.31.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "dd7ba8d91260f6f9f427d3a2d5aa307e"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.31.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "9ca002e848cba0e5620783d7b859b886"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.31.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "d12f6ad5521046affacf2800ed678593"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.31.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "108b2da251b9ccd5a3468747ae821e40"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.30.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.30.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.30.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.31.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.31.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.31.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.31.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.31.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.31.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.31.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.31.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.31.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.31.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.31.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "e3f20fbdf0a039cd51c5dcc160a7c48d"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.32.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "ec2181c190e6991fb8a6d43b57aec4b9"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.31.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.32.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.32.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.32.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.32.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "23ec1a5c30b3c2ed0544b096ab4d36fe"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.32.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "7e9da361914dfce9df09676ac45bdce7"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.32.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "cab1cf77a4c6702480a96dc55a5b2c29"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.33.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "00ae4b7fc6e870fe17c9b88738c80e30"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.33.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "bc26bf7cdcee9ea400b1c5827556eeb1"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.33.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "57b2ff92feb583bfc61e1cb90d7c2a35"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.32.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.32.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.32.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.32.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.32.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.32.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.32.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.32.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.32.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.33.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.33.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.33.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.33.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.33.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.33.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.33.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.33.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.33.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "c0f0eda081711fcaca7f33a50bba8227"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.33.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b68979fb6ccde5ac89089f712415a814"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.34.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "b797ea40687985202fe7e41ecbd7dad2"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.34.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8304550c686860c00065e3f392b8b93d"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.34.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "2d5b2e72c7e012d68cc951c4d3a7f49d"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.34.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "fa45f9fb41e4a46acdea87a68acff9e0"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.33.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.33.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.33.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.34.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.34.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.34.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.34.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.34.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.34.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.34.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.34.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.34.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.34.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.34.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "bb2fea1605ac8df7fdc731da13bb71bc"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.35.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "1efabdeae2ae9fd4e83a7f9ac5253726"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 31580672,
"records": [
{
"name": "transformer.h.34.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.35.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.35.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.35.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.35.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
}
],
"md5sum": "61ef8ebd9a50e3bfcfa1d8eb7b66d1aa"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.35.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "4c82015285fb52e0e4b23a023ffef027"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.35.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "1a9c4da81a1c623a45d69a4b7c1b6449"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.36.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "f3b2fb19fe71b291dd529243f417bddf"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.36.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "552c819d816545a1df8b6f240ccaf520"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.36.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "705c5e2b2225b9c534a36a0e3c288c4c"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 26284544,
"records": [
{
"name": "transformer.h.35.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "transformer.h.35.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2359296
},
{
"name": "transformer.h.35.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2371584
},
{
"name": "transformer.h.35.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2383872
},
{
"name": "transformer.h.35.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 2396160
},
{
"name": "transformer.h.35.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 2408448
},
{
"name": "transformer.h.35.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 2457600
},
{
"name": "transformer.h.35.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 11894784
},
{
"name": "transformer.h.35.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 11907072
},
{
"name": "transformer.h.36.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 21344256
},
{
"name": "transformer.h.36.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 21357056
},
{
"name": "transformer.h.36.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23814656
},
{
"name": "transformer.h.36.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23826944
},
{
"name": "transformer.h.36.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26186240
},
{
"name": "transformer.h.36.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26198528
},
{
"name": "transformer.h.36.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26210816
},
{
"name": "transformer.h.36.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 26223104
},
{
"name": "transformer.h.36.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 26235392
}
],
"md5sum": "6c89b81049a768bb00a2edc703096068"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.36.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "b711e552379d561d798a3f7b9eaad35f"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 19660800,
"records": [
{
"name": "transformer.h.37.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 0
}
],
"md5sum": "45946233a139ee5eb0d1bd1286ba9a83"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "transformer.h.37.attn.c_proj.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4aefa39dcbdac601e387adbdf9af3e35"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.37.mlp.c_fc.q_weight",
"shape": [
24576,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "009466fa17ed38a4982e073fb5c45ed6"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 75497472,
"records": [
{
"name": "transformer.h.37.mlp.c_proj.q_weight",
"shape": [
6144,
3072
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 75497472,
"byteOffset": 0
}
],
"md5sum": "dd8a3c07257ae8438e9e5efb69370338"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 33276416,
"records": [
{
"name": "transformer.h.36.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.36.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 9437184
},
{
"name": "transformer.h.36.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 9449472
},
{
"name": "transformer.h.37.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 18886656
},
{
"name": "transformer.h.37.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 18899456
},
{
"name": "transformer.h.37.attn.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 21357056
},
{
"name": "transformer.h.37.attn.c_proj.q_scale",
"shape": [
6144,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 21369344
},
{
"name": "transformer.h.37.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23728640
},
{
"name": "transformer.h.37.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23740928
},
{
"name": "transformer.h.37.ln_2.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23753216
},
{
"name": "transformer.h.37.ln_2.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 23765504
},
{
"name": "transformer.h.37.mlp.c_fc.bias",
"shape": [
24576
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49152,
"byteOffset": 23777792
},
{
"name": "transformer.h.37.mlp.c_fc.q_scale",
"shape": [
24576,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 23826944
},
{
"name": "transformer.h.37.mlp.c_proj.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 33264128
}
],
"md5sum": "913f3518d60b3fe2473348030eb4e5f3"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 31592960,
"records": [
{
"name": "transformer.h.37.mlp.c_proj.q_scale",
"shape": [
6144,
768
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9437184,
"byteOffset": 0
},
{
"name": "transformer.h.38.attn.c_attn.bias",
"shape": [
6400
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12800,
"byteOffset": 9437184
},
{
"name": "transformer.h.38.attn.c_attn.q_weight",
"shape": [
6400,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 19660800,
"byteOffset": 9449984
},
{
"name": "transformer.h.38.attn.c_attn.q_scale",
"shape": [
6400,
192
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2457600,
"byteOffset": 29110784
},
{
"name": "transformer.h.38.ln_1.bias",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31568384
},
{
"name": "transformer.h.38.ln_1.weight",
"shape": [
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 31580672
}
],
"md5sum": "903d85cf3e729cd1dc5a7ab56c7dcfcc"
}
]
}