{ "metadata": { "ParamSize": 313, "ParamBytes": 4760885248.0, "BitsPerParam": 5.0011817065612245 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 272498688, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 272498688, "byteOffset": 0 } ], "md5sum": "dffaf3fd11a93aa911159c9ae45d9777" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 34062336, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 34062336, "byteOffset": 0 } ], "md5sum": "26dcf1e305f09cf2172649370821ca07" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 272498688, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 272498688, "byteOffset": 0 } ], "md5sum": "cfe1ab57a348e301a2440e87effb917e" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 34062336, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 34062336, "byteOffset": 0 } ], "md5sum": "69ae1a8dc4aacfa8a78f5374a735fa9f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "aea2aec974a175ad1830259744d825f5" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "b8c25f5e8d8b5d800899630a542fd896" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "4dd81801a115cf5dcb06baf7fff1c74e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "39a13234ecfd611253a33166546ad7c2" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33519616, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 7168 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 4250624 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 12737536 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 12744704 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 12753920 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 21011456 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 22043648 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 28466176 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29268992 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 29276160 } ], "md5sum": "e08dde1535f911b39ff7e03e0ff5df8b" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "bd1ea77e9e84fdb5973e82c46710c00e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "db2c3f54fc91eb50b5431c75180de32f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 8486912 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 8494080 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 8503296 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 16760832 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 17793024 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 24215552 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25018368 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 25025536 } ], "md5sum": "c7a03ca7aa2676c1fc3bac7694a98a33" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "271e21777ba31c64c922c5edc2169fac" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "9e2cb8c8a1321a1e03607d1406fa468e" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 8486912 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 8494080 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 8503296 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 16760832 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 17793024 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 24215552 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25018368 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 25025536 } ], "md5sum": "619f36bce76247efa2138b1411bc4db3" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "87a2f3c145844fed6a5bd4bcfab5f09a" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "d199d481795fc77bef169625516b5af5" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 8486912 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 8494080 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 8503296 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 16760832 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 17793024 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 24215552 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25018368 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 25025536 } ], "md5sum": "921c984a0f6256ad672e541ad7652856" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "f3922e7e02a3748ec55bf1519e469a8d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "5b5cc1eee4ef2638852380c2603e93a3" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 8486912 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 8494080 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 8503296 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 16760832 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 17793024 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 24215552 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25018368 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 25025536 } ], "md5sum": "14f848d918ce8124d3d64de9770ba833" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "09e7846fdc0647eafa976101e9835fba" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "c902f5acb041a476a8e094d4bb2f9f35" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 8486912 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 8494080 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 8503296 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 16760832 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 17793024 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 24215552 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25018368 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 25025536 } ], "md5sum": "29dc6ec2c173572ca1023bae41df9b73" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "1a1d784167d64d8e1264c6b944797fcc" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "1168367ce72db319cded418f53611692" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 8486912 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 8494080 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 8503296 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 16760832 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 17793024 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 24215552 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25018368 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 25025536 } ], "md5sum": "9e86f79d608d29858c3b5de064660e92" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 33285120, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 8486912 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 8494080 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 8503296 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 16760832 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 17793024 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 24215552 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25018368 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 25027584 } ], "md5sum": "ddb057637ffc874137957f2b6cc395f6" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "cc75bced5158b6ae08e0e04792cedd78" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "482e64b8ded6f0c9d26ec1d74b0b1ea5" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 30301184, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 1032192 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 7454720 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 8257536 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 8264704 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 12508160 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 20995072 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 21002240 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 21011456 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 29268992 } ], "md5sum": "9e0b1766d0da633d520c385e2319b6a0" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "03c81b3409287a94049f6f4e79f3e05a" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "ee014d3844382b94e5703230f4cb4a28" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 6422528 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 7225344 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 7232512 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 11475968 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 19962880 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 19970048 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 19979264 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 28236800 } ], "md5sum": "1f3c697179b92a2878c41c5d3c4e7797" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "7be3586a4bf42eb274d9ed4ec7c9224d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "a9e18616eccf2d97af6314df40598ca3" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 6422528 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 7225344 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 7232512 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 11475968 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 19962880 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 19970048 }, { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 19979264 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 28236800 } ], "md5sum": "c9cf6550e0baeb980f6136b11df5e29b" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "e486d3fefdbd0995c3a36bb07c8da287" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "dccc1497ae8eb4f14498db3746e919aa" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 6422528 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 7225344 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 7232512 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 11475968 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 19962880 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 19970048 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 19979264 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 28236800 } ], "md5sum": "4bfb410c9016c339c96cb1e6e2619a53" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "d48f8321a8a6a4d4008e34525ee170d3" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "05d9b90200f8882fe46b0a9fc7540394" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 6422528 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 7225344 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 7232512 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 11475968 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 19962880 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 19970048 }, { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 19979264 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 28236800 } ], "md5sum": "20f2140e5561bbf9a3ea68f8430a8cbe" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "c72b6daee4231e6c267e921319178c67" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "00443a0f35ab7d3833b883b0dbd68d1e" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 6422528 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 7225344 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 7232512 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 11475968 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 19962880 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 19970048 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 19979264 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 28236800 } ], "md5sum": "76331afbce73603c3bc25266516aacee" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "f74c085c4952ca41a78a8afc8ac2e4c8" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "e53dd74c9dbe480d1f3f2ecd3d70c9d7" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 6422528 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 7225344 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 7232512 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 11475968 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 19962880 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 19970048 }, { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 19979264 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 28236800 } ], "md5sum": "72f7c8124d95cb27d37c1d7b5ad01511" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "81fa4643946489863c19f416fa616857" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "f1d3d479c5e3105faba72971941e055b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 6422528 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 7225344 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 7232512 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 11475968 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 19962880 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 19970048 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 19979264 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 28236800 } ], "md5sum": "7bf0e3b1023d4d4c841043ad33418756" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "ffef8a3bd805c2385faff52c596f97f3" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "955cf3e3b97d1ba39c6db198d832015d" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32243712, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 6422528 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 7225344 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 15712256 }, { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 15721472 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 23979008 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 25011200 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 31433728 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 32236544 } ], "md5sum": "34395b779f1a3df8cc499d2f34ca5250" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "e82dd67bd893e228a330f0d7388939a4" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "40f4cacf6fa02fe7480231766b59a047" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "991538619c45044f954118572a6c2c7c" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 25491456, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 4243456 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 12730368 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 12737536 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 12744704 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 16988160 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 25475072 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 25482240 } ], "md5sum": "ea3a1fe5c89d645011146b4069910b92" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "97375c97bc069dcec9c8cb13bd67c851" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "8ca34e29af917e9ee929fabf8634b173" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "3fea02d2f06789b8dfa02fede3f2ef99" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33526784, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 20765696 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 20772864 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 20780032 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 25023488 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 33510400 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 33517568 } ], "md5sum": "54a7c0c0252d51b81ca079d68a185127" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "b5cfe50de42c4b54a08dc3af6668095b" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "81afb8ea08e6114d8f13345c737c41e8" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 20765696 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29252608 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 29259776 } ], "md5sum": "eb73224db7afbfb6dc88bfaadd5fa85b" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "9144212842ee815bdb8234ef18beb4ac" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "81028a6d534ee2399447d40e4527cb5d" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 20765696 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29252608 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 29259776 } ], "md5sum": "d01749df7c51614c9f5165f18bdf616b" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "b5f9dba1e12c4c5168b23c9a9ca2ed47" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "248c2bae200df3b5d9fff85212330068" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 20765696 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29252608 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 29259776 } ], "md5sum": "bb5cfb854151ab9ac84481a296c3e461" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "f4a2c8f44b7d8e4fdd468013fbb7b8a5" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "70c692a227924a34ac0e393ff38f5021" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 20765696 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29252608 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 29259776 } ], "md5sum": "12ff7635e93e0e370fe23781e574ad00" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "1a1a56d656c59f2c70aafe3d0a710ed9" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "5de9bf64b2531c78a3de4bc6dfccf509" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 20765696 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29252608 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 29259776 } ], "md5sum": "53599996ee29df90597aecc29297b0a2" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "949a5da354ebb60ce43f5d06da6e78b3" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "b00af3981041a8eb58b516db46d7a2c7" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 20765696 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29252608 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 29259776 } ], "md5sum": "5d4e3058810767dfbed9e8bf9f16286e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "286e803188b75381d95803662288e7a4" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "f5194d036b4d525659a5844907a0bcd3" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 20765696 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29252608 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 29259776 } ], "md5sum": "b11486901439aeea156cbffa9a5313b4" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33947648, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3584, 2368 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33947648, "byteOffset": 0 } ], "md5sum": "12e555037da2ed76674854a90bbd3870" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 67895296, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 37888, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 67895296, "byteOffset": 0 } ], "md5sum": "568443a00bbc3d876d77caa8d6c0b502" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29268992, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 3584, 592 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4243456, "byteOffset": 16522240 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 37888, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8486912, "byteOffset": 20765696 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 29252608 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "bfloat16", "format": "raw", "nbytes": 9216, "byteOffset": 29259776 } ], "md5sum": "5f35e77b6759e132b69f3caa89fd264a" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 16522240, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 4608, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8257536, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 4608, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1032192, "byteOffset": 8257536 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 3584, 448 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6422528, "byteOffset": 9289728 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 3584, 112 ], "dtype": "bfloat16", "format": "raw", "nbytes": 802816, "byteOffset": 15712256 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "bfloat16", "format": "raw", "nbytes": 7168, "byteOffset": 16515072 } ], "md5sum": "476e351b747cc0a5f3acaf9d39483d47" } ] }