diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/mlc-chat-config.json b/mlc-chat-config.json new file mode 100644 index 0000000000000000000000000000000000000000..590e65e99b4a1bbb69ae78996d6897cc8f15482f --- /dev/null +++ b/mlc-chat-config.json @@ -0,0 +1,83 @@ +{ + "version": "0.1.0", + "model_type": "llama", + "quantization": "q4f32_1", + "model_config": { + "hidden_size": 4096, + "intermediate_size": 14336, + "num_attention_heads": 32, + "num_hidden_layers": 32, + "rms_norm_eps": 1e-05, + "vocab_size": 128259, + "tie_word_embeddings": false, + "position_embedding_base": 8000000.0, + "rope_scaling": null, + "context_window_size": 32768, + "prefill_chunk_size": 8192, + "num_key_value_heads": 8, + "head_dim": 128, + "tensor_parallel_shards": 1, + "pipeline_parallel_stages": 1, + "max_batch_size": 128, + "disaggregation": false + }, + "vocab_size": 128259, + "context_window_size": 32768, + "sliding_window_size": -1, + "prefill_chunk_size": 8192, + "attention_sink_size": -1, + "tensor_parallel_shards": 1, + "pipeline_parallel_stages": 1, + "active_vocab_size": 128259, + "temperature": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0, + "repetition_penalty": 1.0, + "top_p": 1.0, + "tokenizer_files": [ + "tokenizer.json", + "tokenizer_config.json" + ], + "tokenizer_info": { + "token_postproc_method": "byte_level", + "prepend_space_in_encode": false, + "strip_space_in_decode": false + }, + "conv_template": { + "name": "llama-3", + "system_template": "<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>", + "system_message": "You are a helpful, respectful and honest assistant.", + "system_prefix_token_ids": [ + 128000 + ], + "add_role_after_system_message": true, + "roles": { + "user": "<|start_header_id|>user", + "assistant": "<|start_header_id|>assistant" + }, + "role_templates": { + "user": "{user_message}", + "assistant": "{assistant_message}", + "tool": "{tool_message}" + }, + "messages": [], + "seps": [ + "<|eot_id|>" + ], + "role_content_sep": "<|end_header_id|>\n\n", + "role_empty_sep": "<|end_header_id|>\n\n", + "stop_str": [ + "<|end_of_text|>", + "<|eot_id|>" + ], + "stop_token_ids": [ + 128001, + 128009 + ], + "function_string": "", + "use_function_calling": false + }, + "pad_token_id": 128001, + "bos_token_id": 128000, + "eos_token_id": 128009 +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..29aa13c71350992f900689434f34cd0793103943 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d37e9d338442879e38d0a5a93c2e6c5d0702c2564b819a7d1f31e87d8f6d98 +size 262674432 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..f73d7b66178236e7e86a5a42d85e6927b69f2bc2 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318196e67ded49b0ef68442fe073c8bb8d1f613260ebff97b33c0a791e825023 +size 58720256 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..a3956e4f451f244c78f69569b7ad3d1c70e30e98 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d067826521451875e22f38766b2034527d520549b1954b1ef4ad65959e549760 +size 27271168 diff --git a/params_shard_100.bin b/params_shard_100.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddfa357c139653856e25558ca3977eb74ef0c533 --- /dev/null +++ b/params_shard_100.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418186dc523ca7b868e00ac747c9dae6884c1f95c19afe1eb3ce975b53e579b2 +size 29360128 diff --git a/params_shard_101.bin b/params_shard_101.bin new file mode 100644 index 0000000000000000000000000000000000000000..c71c66efbbef4b57d77ec84f3708c4a6ebfe136c --- /dev/null +++ b/params_shard_101.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b558d4e84bf7b7b00f52ab9f9d1733ecdb7864817a6c5ffc55be4a98c4a6212a +size 33079296 diff --git a/params_shard_102.bin b/params_shard_102.bin new file mode 100644 index 0000000000000000000000000000000000000000..84f167d98a3b141db9a49fac5f23316262f52a4e --- /dev/null +++ b/params_shard_102.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da591c28edf9ad347528b28d9743f810cf341332945849aea18a0e4f91220d07 +size 29360128 diff --git a/params_shard_103.bin b/params_shard_103.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dbafebc4624ac1c80c675c156e1b12ee5dfe509 --- /dev/null +++ b/params_shard_103.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96293734335bb73239edb057da74e506c5743a775370c6d99d5d90c2b19385d2 +size 58720256 diff --git a/params_shard_104.bin b/params_shard_104.bin new file mode 100644 index 0000000000000000000000000000000000000000..2dfe1c9ea7595492536d2a9bc24701c8c9ebf6ff --- /dev/null +++ b/params_shard_104.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46ec2ca7a9c1545fbf49a792284a1990cb121d9f551b7994dd4a1a2a8e11a93c +size 26214400 diff --git a/params_shard_105.bin b/params_shard_105.bin new file mode 100644 index 0000000000000000000000000000000000000000..6039a24989e3acc962da840d159e714512923a3d --- /dev/null +++ b/params_shard_105.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:216047103de55920924409558c3639897a82ed4e0f64774798d357af05f1166c +size 29360128 diff --git a/params_shard_106.bin b/params_shard_106.bin new file mode 100644 index 0000000000000000000000000000000000000000..8143331351860f745680794d2303cb66a9e82dba --- /dev/null +++ b/params_shard_106.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0485a3e57814cd3ea4ff17b4b18babb5cf1af571cc5f3daf579b81abcef90ce +size 29900800 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..80c2aea62edd2fcdb57ec7cfdae523f63279516d --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:031a783475aa77ead79baeeb6a939678ed4e402b1a29e8545b10c5dabe24a966 +size 29360128 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..8695d2081652e4df0f810f18b22be7f2a1556771 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b96217573ffbc46e25ac2d43e2aab0506acd00f6b3c8552d807758326bc02c8a +size 58720256 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..0127af87730009aa406f47576cce8b98a1913327 --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b53c43ef414d77da0808db5074f6910be66f7b7a1923c438cd976edcffff3eab +size 32505856 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..87a9635b3303c91efc9b8ddb8c724d045b520bb6 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82f7897f2cfab38ecc05961aaf2041daf28b82ef4b3e0742189013512f69c354 +size 58720256 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..10a62d7ef67af3f63aee14d725cb365700cbd560 --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c3bc440719f970701057532a40bf7d94113af88f2bc5cc983f0580087a8335 +size 21495808 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..f52e993514085d6c834876d26afd5fba010dd637 --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d70c5b78b11508241026ba299ae73e3298f38d4c613d498d816683e58d03ee +size 29360128 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..55db3c5238965a9e30d5170b1300d483b2ed9e69 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5b40195350072e3fa1233b3f2aee6b430ea0b32a86f0482087700d8becc78f2 +size 29360128 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..26da16bcc1a03e8dc5970a8fe60bc078e3e8ccdb --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67609965ae013156a2b124567b331218e9a294eec475b73752f477b818a9a30a +size 58720256 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..0a241d4a64bd889a75d2547fe863c291f2252a24 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e661bcbcac4d0dbd082b7546fc9bf604d37766a83de1f80b27373830a281f3d5 +size 30949376 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..5b1735cf1343de3ae4553827098a1ec2937d84e6 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c8ea8db0eede4c2854c86f365cd9cbec2f95ff16f284712af605768f5e7fcb7 +size 32842496 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..15c81a2e53bb86e2bde8df139d0ade3101d8ce31 --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f9cf11e5457f59d65aae84fca91dbef276d9a43369cccf681634a9edc63b1f0 +size 58720256 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..349a99d4e17ed376a06ca945c08603be5725b714 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d344bdd269e548e9d507fb2e046d9340f7f1cdc80db7c6c73cc8b2561a7a67d +size 29360128 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..4958c4352052f163f79d9063876b67ba9da820d2 --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6577624f7028d2aa2f06e0ea22a17a6564f0b2c94ac2e857fcd7fb844dd37261 +size 32522240 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c55672b48362f78a400da505a559b9370902ee3 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e699910757c9a0a92aac556793d4e6977402956a083205300cdf0e6f70649f0 +size 23592960 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..feebe11b8e3373adb4939cf00464141612629d7d --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:291a85428557fd71a37e5195f51bd204d14c50f0ebae9325167569377d465e65 +size 29360128 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..d44945c0ecf15380ab8ae76d76f214fba545ec9e --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30154072613300f7430d546ed6d4692aabe90f55ec4fecdf1b9d2c734514a79a +size 58720256 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..1b4b9335d4e5e7ac9e11b4249f45e5251fc19987 --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adcf0dc5c0b9fb3058e28039dd179a22f99b77a20d14778173d0309463c91691 +size 31997952 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..eadefb85d043fa954f3ad2b686967eea09a33ffb --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3df87c2162421d5283f5bb74dc78151d91cf96f12f48adccde6e51ef6f6ba984 +size 21504000 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..39d217f6f95c0a9a0990d017d29d3de53e4761f3 --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc42def184109d2c5401a89eed97695703e04cc012d5f811dbc56ecf3ede32de +size 29360128 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..e193712d3678a79e741b61596b9142777c6944ed --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82815d9427960171b7777017e03531a9e42fc2075a5c6b7157d24a662680ffa0 +size 58720256 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..c399e606a765673a168733bd8bf80fec63e64086 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47969276516a83392392d3eeed6532dffe1b3cc89662893c8f9a17ceeaf4048c +size 29360128 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..b61eb70b567ea89fb8e9cb2a56eb502286e17154 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:785261bbda3be6ec31319a01cc42cb63e4e7ce7747e50eb65754515fe37dbd59 +size 31989760 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..83697ebe39f66395f6b532f421f64b0e7892a6f8 --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb01af8811e59013595316bce9c831b7ca8442a67b9ea0cc0d2293954d082ea8 +size 58720256 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..c59ee567ffa08ef54ffe753ef7a2b43bc70c3684 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c446397bc17fa447bf173ed2d41329c48ae568ac9e0de5a5c491e794caff1eb4 +size 58720256 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f20abde37efcacc2fb244fd737d37ebed20fc81 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d4623e493c10ceba16ed95d3c3eb62ee5c5bb68022665f0258c255defae2dc +size 28852224 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..bb02fecf09af5cc89d90c9dad4688b43234f067b --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15640bf6bc444b4494dd5a5fe7e93c8dbd55793ae3cf976d77091ba24f156464 +size 29360128 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..d7b5f10cca50ed7ff8b3f0d1876946e0b92456d6 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79fbefe8f6fa75dc5d60953f19df25e2ffa644283f8a19a437ba702ac297348 +size 58720256 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2cfc095e68b9e751af3f106d3507b21255a6f18 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0d2ff14974057fbc85fa1deabb5df07a292d3eb0bc995de6a10cf4cc023719 +size 32530432 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..88e8b85376ba3c9c11c58fc14348a48148a60ce4 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f641f296ac44cff3d82dc7dca05bdfeb2ed5268b5e3d8f61d1428a64dfcd8e9 +size 58720256 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcd12cf7ad5f74c0fa59d2d4144256fb661fdc5e --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bf35222fd1e9d2d764bd45b3e0c153c1981aa2f2647bb756dc09731a590995d +size 58720256 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..897bfa5cac4511f19cd54575de508649b52f9920 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d4af193c7af4f3ea933792d73af204748237ffad85298bca77a57f642a6b80 +size 28844032 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..c099cd4951b65c7ebf666094382fe3c3ac5c858f --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc785246f4dc87fc05661b3d695b8bafe5fc3e7da455f7cffb3a2cf6f360fd1e +size 25165824 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..5659d418dc9aced4ed4d8592ec6e0c822d1796f6 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a83b4b4907e4bf2f6e2a55121a0c74e924a6b84134e65bf3b7032027bd3f8d +size 58720256 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..d053536ed4846d0326da4938876b83947a6a4428 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4844797b7b016d0bfab0edfbc23739c6415962c1626f22ce8979fded92217766 +size 21495808 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..a77054d34a36da2efb5fa6de857e698637a5abd6 --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8c004b56b5555ed7b6d2be48bc0a7b4fcade5eaa603b5e5abed54454706580 +size 58720256 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..55207dbbb7f2fcb8308671f91ff59c0c360f80fb --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa970e3378f7523567b14ddf20e82307cbb8f285ede0f60b418291136a8f121 +size 262674432 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..89f3887f1ced1f5d307d45af74c4fd987d7abfd2 --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b424be4a44869eeddafba940c7d033dddff30c157f2c4375d98d67ae700097a7 +size 32834304 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..abd81e0f3d353826db064218aa8f28b04eb7eb15 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0df96feb88b7974467d45fecac08c6e08dce26c595c965db21a142cf54807c07 +size 58720256 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..d8b76584cb7a5b8e6f829c11b5e9ad92bc933f71 --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce5066c6bf3403117f593cb9ba38fc94c573d4413270f82e7c82599f59620fe7 +size 30957568 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..4536a47512a012345afc2cc4095ce286cd9b2a85 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf23bda797e622640d21cc23a3a3be8a46129471575face1665d3bfe53974045 +size 29360128 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..5cd86cd6efa1ea5287d46e2ae9de0d3e05260d0a --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23fc4dfaf0f081f396ba0774afa4df36f60709fccd52ec6a3058c0e4c86eecbb +size 29360128 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..5a8c7ff5e8a082e7343828c6f9043cbeef128b0a --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb7b48daa0c80ed2266f997035a31cc82742c1e0b620f337bc83045f1314e4d +size 28860416 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..67d4d815fb126ecb70fb3bc32cb8ef2c884956a8 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c139e66a7309dbf041fee16266097c0de5d74fdf8d328dc69fed426ee0976560 +size 58720256 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..0dd6590fd6105b4616e717cd5417548ca290b812 --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9963b218a9022fe324d3e67d36d61f32252d2076f88b0fb67cd2ad9ce59a2ed +size 29360128 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..ee6f3e1bee72a58515b5f774926aa409a2eba63d --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6782e5b6ba0a91a4cbd17e137d868dd11b18e4a11b5b6bf5f046ce5c7d81b51e +size 27262976 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..7ff1d49b4988c848953952aefaf923fa21e9ca2c --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60549b7faa5299acd26591cbe75a4c6502cee872d063c48c21283dd366f8cf5a +size 58720256 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..7a9f3b1216054668435905ed610b14492b8ef97d --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b887cd0581b8abb93211b29d999eb48425edfbdeaa8f5b4c577b5f4355d613a +size 30941184 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..5bbe657e9243e49001511b7f52a5bff37408eb16 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2549c5645ff58998c9b5b5ccad1ac23db939028a26b686e8bb41be2a5f4016e9 +size 29360128 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..1fa13cb672fe57a6b866931c1aab259379e0f233 --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e0df1be51bdcf01b4b756b3f7eca3d6a1d56aad8bfa8f01063c57131a63110 +size 27279360 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..e800dbea4421cc0a4ea3ad092717d412ae13539b --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45d6485d50b99a8629febf8ed605bb0cb006c7ececdcf1b9e417c9223639e3d +size 29360128 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..9689082b7639f30e66e99fc13f522bddda61be95 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e00881f75b193b8e3fa9bbef235056d586c40f855ed2151dd0e8743e883662d +size 58720256 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e1dc54783011bb8aa9908008a41d10468d2bf15 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddbdc18d0c5f90350e5adae42d19c0c3c265071d08f444b959b9eff20e3ffa95 +size 58720256 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..0635176c879a6a42fe3901b09ee43ea15669f045 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa7ebf4ae4e90bc0e84a8b3efb55a6f0d7faf532188a04d899d94529d359ece +size 27795456 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..e26f0d32758a930db5de7a5a052b87f38e366eb0 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc5eb0b5b01db08305df889e706dac5b0da5892a4da436e7bead77e9c7c5fe9a +size 21495808 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..86cc6f437b42fcfdaf8cee2357a7adb10cdd8596 --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f07e83e58cc34250f21b7d42957623dd94508e4301df674eee5689cb3e368a8 +size 29360128 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..d70a97890775e0658eb17ee1008ba11673bb6f7c --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31cba0f279c8d638526998c7fef545652fba366e1313744e8d682b5bd927a749 +size 58720256 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..ba368b90af61407b706efa65824456449bcf7457 --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7028cd475013a8c253a2116ccf2709af7b049ca71de87f4312ee27a68c48b7 +size 27271168 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..1ac756cc066401c692ebf43e24c1c33f5e53b5c7 --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a45402742d0737367e944f6b564ebf2a62b2a35a0782922d2c75801f4a1df3f9 +size 58720256 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..baa85be68b9c00acc35241c822748477affe8792 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7430dbddfd775ac13e0c7f620cb072b7b40210d6b5d3393be700735896dc0f08 +size 30957568 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..08517892f59598bbac6b7bf1b29a572cd6f91420 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf020e805ef8b5d14543fc1b439cb71cb908dc55c8d7fcdc1137abf655ecc70 +size 29360128 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2d6e5fe89daf470d1b4f41ed64164b00d983123 --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e39d89cdbb8bcd3fe1b8dbc3b10e51debbdab0f088a711508eb3d1a363b3c78 +size 33038336 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..727702494779cdd2c82f62841e904f01241ea530 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd9d9f0e7a5222b6bae487341b46667083319a593a3397b8ada48aa87cabc954 +size 25174016 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbcee07f456b5bde290078c47585f17e406bbecf --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edacb9bda70eb1d39b7a42971e4cf68f791f596e036d96e8d9f5d2f946b0fe88 +size 29360128 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..d97d8c8d1a5350bd68a9c4857e4d0cfaa4f93d93 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c14c7887d5773a4d3264344a01f1332ac1c97b40019b38a6a7f83926d5190758 +size 58720256 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..f42e29e5feaebb239dda9d29b28c5ffcb2605aaa --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02b2d78101d12607127d76429813dc4d10d3cce0deec1baa42c8349405c016cf +size 23592960 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..14d7cbe38eb2c1beb41884592bb63db4aaef1a0f --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e0d89c77ae0b7a5362bd1a13f8c1dd8d5956dd37f9e324ae82afe5cf8ab13e4 +size 27262976 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..b1ae3d426521cae3b84645c994f63b2a55aba86d --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:067f82c945a9c4acc88d4ac00d3c36886764b4312885ef017d5af802daee5544 +size 29360128 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff7c734d78e59250c20f3dd5ae17c48e6a6c909c --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:327b51be399b2da94c4fae9380c7c0ef23fd5f9ec2d129b83d0ba1c889c5e39c +size 58720256 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..a884ffce20432286f26397d2f0f16cc5fa6d93ad --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b4f5ed83bf504377d3d8216c2d128847e79fbc2a90e34cd4603470c7134d596 +size 29360128 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..1425fa4a8ebc13c06c20bdc8ac19f899c371c849 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf17601da53c3a37cb5deaf7bd1886638a91030f4c2bc5f5be25e34422748ef +size 32522240 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..910ebf8467e8a5b48460831ddbdffa84b915558b --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cde34a8b0b8636f1b69105d49568174db8e16b79e499284918a2a723983271f6 +size 58720256 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc5a1c6c9b800ade0184c846b0d0ad9f4ac7a71 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d17a2bd25ea1a50d841913316f934194d041038129aeb88d33de546831e9d7 +size 33030144 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..da35017460264c5e54ea39f2f5e13bfa65aa7e85 --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6eb48bb2ad39a68e2fe24d5c309328d3d6ccb93118e0ca30572636cc1daf397e +size 30941184 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..64e3067601b2c62306397609f58ae160af28798d --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdb0decdf1b04e7cd76667c150bbcbbe28411b7bcf198d9bc98ab0bda4597da9 +size 29360128 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..a5e5c4577e5a88fdb062aac05f51db9a8f5b1200 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af130e00bfeb3b823ff49fe64ccc7fef31118f678abd38b078d0c7ac1f5455d +size 58720256 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..51fd0d27d6d90e06ca16082a62d6b357fa8bb4cf --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef6c4af01fa55f6b35962ac230c2e6712eca9238cce75bb79ffb3409fa589bac +size 29360128 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..2ed6f73c746143a3261b4d5d1fa26138e56be3c0 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ceb09f3a6408497fda50124311f6dc4833f1b92dbcdf939787bff7036b35db0b +size 32530432 diff --git a/params_shard_81.bin b/params_shard_81.bin new file mode 100644 index 0000000000000000000000000000000000000000..4bedd009a302ba0cc69cbdc55267b0f0c1287b61 --- /dev/null +++ b/params_shard_81.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:334f4c478c999bc06b3f9d1f404cb6796720f81486589741f13cbb702e5eac99 +size 58720256 diff --git a/params_shard_82.bin b/params_shard_82.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7da96318b83a2e65980cb056aab4d26671dc721 --- /dev/null +++ b/params_shard_82.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1fb6bf4d8c4de699962cf4bc921d411a748e8655280ecc75dbad76f8e566b16 +size 58720256 diff --git a/params_shard_83.bin b/params_shard_83.bin new file mode 100644 index 0000000000000000000000000000000000000000..7c9a4e5d7564b3118d33e5f372f8b91a89a2eb89 --- /dev/null +++ b/params_shard_83.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f68f691b398f3df13d51251984433498a94da97f26d23df22f2911f1235fa4a +size 27287552 diff --git a/params_shard_84.bin b/params_shard_84.bin new file mode 100644 index 0000000000000000000000000000000000000000..45d6293b6eb443c683a637c81537ae91a8ac8a19 --- /dev/null +++ b/params_shard_84.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61f5b9381f6144cf75f9d50d58f169b82a0a48390cea18b993dbf450feb40753 +size 58720256 diff --git a/params_shard_85.bin b/params_shard_85.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e56abf0e54aea3ce5c55cda032ef523cada9e2a --- /dev/null +++ b/params_shard_85.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:060e44471c9fcec943c8a81c0d104847026b8f74c3184e7fdb420a82cd0eacf1 +size 29360128 diff --git a/params_shard_86.bin b/params_shard_86.bin new file mode 100644 index 0000000000000000000000000000000000000000..c1b0a3d53b860e3f2b749eb1e1ef7d0ceb9ca074 --- /dev/null +++ b/params_shard_86.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93c1f971720daa7477a13ddc54e1599c7cec4bf4d15a5a1b8c610d744b21fb9c +size 58720256 diff --git a/params_shard_87.bin b/params_shard_87.bin new file mode 100644 index 0000000000000000000000000000000000000000..15afd9c3d5e023d6d29cba7e9aa7dafe42a3c213 --- /dev/null +++ b/params_shard_87.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6d8fbace6534e9d3f225460c6ab5a479fbf08c87b33227a6296841205227d8 +size 27811840 diff --git a/params_shard_88.bin b/params_shard_88.bin new file mode 100644 index 0000000000000000000000000000000000000000..812c3e6f89e855ba7f0e93ed5f1da2eb5ae506c8 --- /dev/null +++ b/params_shard_88.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812efeec56b5bb8fc366bdca6809d9128d394879de0c16eb3ade7e4c4f8ee5be +size 29360128 diff --git a/params_shard_89.bin b/params_shard_89.bin new file mode 100644 index 0000000000000000000000000000000000000000..df99876ec7f027c0d8133ce92cab94438ef8f393 --- /dev/null +++ b/params_shard_89.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7deb3272d2f1c0f05f74dd886e1e9ebe77698e1b5afbcf385d1d71f9e8b61a +size 29360128 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..43715c85abf06b2c8c022361f41f84deb1fb93cb --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4aa88e22c26f045c39f6d5420ceced4722d9355d6633044583a9b6730a47189 +size 58720256 diff --git a/params_shard_90.bin b/params_shard_90.bin new file mode 100644 index 0000000000000000000000000000000000000000..2a3902bc70b81ab0158924ea355df8efdce3fdbf --- /dev/null +++ b/params_shard_90.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fad559bf4ac787ff62265505b3e26329a572d8d644d1bc4895c432adc9dd300 +size 24158208 diff --git a/params_shard_91.bin b/params_shard_91.bin new file mode 100644 index 0000000000000000000000000000000000000000..f08160c46bd17bb042a0ae02856d8ee0d0b2a6b8 --- /dev/null +++ b/params_shard_91.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96b29ce3c195e989437daddb91388f493f12c7d4dc708146c3feb016d8ac68a +size 29360128 diff --git a/params_shard_92.bin b/params_shard_92.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce6f790904e7c824b66821b02d51b2ebec9828a0 --- /dev/null +++ b/params_shard_92.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea542c75781cf6be4cc7f1a600fa4c04c1964f263e2912eb5f3becd88d627ee9 +size 58720256 diff --git a/params_shard_93.bin b/params_shard_93.bin new file mode 100644 index 0000000000000000000000000000000000000000..4fb3024950f4bf9d4394f5fe0655ccfdd9297f43 --- /dev/null +++ b/params_shard_93.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd782a8d4d8b49862bdf0feb079642520b4f5f6bd6013357333a4407aa034a5f +size 25182208 diff --git a/params_shard_94.bin b/params_shard_94.bin new file mode 100644 index 0000000000000000000000000000000000000000..dfc2dd7308a7e7b38c0aee6bcba4ddd6f4005b18 --- /dev/null +++ b/params_shard_94.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6189f53e03b26c17ca6ff4503ef94a023e472eb38e2cbe6bc4d88b87e433a798 +size 29360128 diff --git a/params_shard_95.bin b/params_shard_95.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd31708a8dca60f73fb1aed141ff7f084fd1c57f --- /dev/null +++ b/params_shard_95.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db8d10dbcc5e5e8b5b3a625e81e1bd0c00c9309eb61925cddc85fbcec04f0bc5 +size 31997952 diff --git a/params_shard_96.bin b/params_shard_96.bin new file mode 100644 index 0000000000000000000000000000000000000000..82a825f9ab70b93b003144271bfd5bc073900514 --- /dev/null +++ b/params_shard_96.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef99330e08755148a206e87da0dbf2e6bbf607990266c8a733060f3068c194a2 +size 29360128 diff --git a/params_shard_97.bin b/params_shard_97.bin new file mode 100644 index 0000000000000000000000000000000000000000..b1888428b1b0dca1a8278733af65a71802225e7f --- /dev/null +++ b/params_shard_97.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80d8df0a0536e0d13631b643f5a2c17464aed3183254b18f3047c01d57f1f355 +size 29360128 diff --git a/params_shard_98.bin b/params_shard_98.bin new file mode 100644 index 0000000000000000000000000000000000000000..f2dcdcdff9615eb68edaa4f6ffd7a35e6a1ff4c1 --- /dev/null +++ b/params_shard_98.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e83f1d661cad1f93bffff1a668d134ece6ad913c883699089cc838eb59c726 +size 29360128 diff --git a/params_shard_99.bin b/params_shard_99.bin new file mode 100644 index 0000000000000000000000000000000000000000..7e785508e1b8d706f9d920846c11da01a43d33f1 --- /dev/null +++ b/params_shard_99.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03d1797bbb512f7ef7af0ed23d75a3d6fa49021453de680873076c119e810d0 +size 29917184 diff --git a/server.py b/server.py new file mode 100644 index 0000000000000000000000000000000000000000..c44a1e6bacacea2950ba14cace7962eb4f6bfbad --- /dev/null +++ b/server.py @@ -0,0 +1,11 @@ +from http.server import HTTPServer, SimpleHTTPRequestHandler + +class CORSRequestHandler(SimpleHTTPRequestHandler): + def end_headers(self): + self.send_header('Access-Control-Allow-Origin', '*') + self.send_header('Access-Control-Allow-Methods', 'GET') + self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate') + return super().end_headers() + +print("Serving MLC model at http://localhost:8000") +HTTPServer(('localhost', 8000), CORSRequestHandler).serve_forever() diff --git a/tensor-cache-b16.json b/tensor-cache-b16.json new file mode 100644 index 0000000000000000000000000000000000000000..bae64f4cded99e52a27c3dbdc6111316d099d038 --- /dev/null +++ b/tensor-cache-b16.json @@ -0,0 +1,4375 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 5019827200.0, + "BitsPerParam": 1.5790556154328332 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262674432, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128259, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262674432, + "byteOffset": 0 + } + ], + "md5sum": "1a8db284cf568c04eb50e0a61670b435" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3ba101a32a60e4957d4f341ed2474d54" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32842496, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128259, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 32834304, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32834304 + } + ], + "md5sum": "25b9648a2f7f4bd916a635505f8ed165" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bde72072557ec421c0ff4ae82ce96784" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7340032 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19922944 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21495808 + } + ], + "md5sum": "0bc04e510210d12f4a773ddfcf2012ae" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3b7c7544320febb20cc7b3a3310e4415" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 21495808, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14155776 + } + ], + "md5sum": "e5bbdd5d1d201a4ae1bb0828a8a52e60" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 23592960, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + } + ], + "md5sum": "a8fd0fb8a5c610ce1c66b6c10d82b030" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ea30f6795d29feeae83af89e7e8fa85c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "746157e8b6c2189cc1f5a8d460a445d3" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "edcb64768413917f8fdb66fd7c632851" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0247610b68f5900228a65256358f0120" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4f7e7f8bed5b1ee976f4b7dfe143223d" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32505856, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7340032 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19922944 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 21495808 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 25165824 + } + ], + "md5sum": "9b95ca4090dbe045baa76e5c0d444fcf" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "083256552395630b71e265052c56b44f" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21495808, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14155776 + } + ], + "md5sum": "0fb7eba5c12daa5c6c559d3351c3ec89" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "06e904967db15497efbedf6df26d70d2" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "83e68fa78e4048b638bf27f4ceccc00a" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1f83fe0c80b8e8e2b8741f4b1792206c" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 14163968 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 17833984 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "512034d4872f6b17183d1c51b93caa7d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e1dccd0df2c5d770645275ea79ded18a" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cdd79ac3215afba2d0a8f249fe99bd48" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 32522240, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 7348224 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 14688256 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 27271168 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 28844032 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 32514048 + } + ], + "md5sum": "e45b09ae4128a2291092f533ff8af6b2" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 23592960, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + } + ], + "md5sum": "ee6e8e015b80bb375568dbd0a9385600" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "05228c3e13478e4248b37d8d9305b779" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "75588b4068058bea27bf5beb45573db1" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 31997952, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 14155776 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 26738688 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 28311552 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 28319744 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 28327936 + } + ], + "md5sum": "d78184ef263ba7c0cd48bf8992a8763a" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 21504000, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + } + ], + "md5sum": "a7e024f53dcbb8c403dc5846471e1230" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2f555714241ccfbc775ed5dd148c141a" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5ed3a430e7bab4e5f4b57a5c6de411a5" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 14163968 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 26746880 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 28319744 + } + ], + "md5sum": "6a0b5ed94f61522b1d685e08d79fb8b3" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1d431ad2068ece3fe4d5c79a3299d003" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "104c92ece5860e60e5ddf41b21f33260" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 28852224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21504000 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 21512192 + } + ], + "md5sum": "5e15cd3eef6c8fa6b29bacfc72085941" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "42d39b7c0a48c578d45490617652f94a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2f52a6a6b635f360945cd50c6f77d450" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32530432, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7348224 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 11018240 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18358272 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18366464 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18374656 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 30957568 + } + ], + "md5sum": "f090e66edd23dcf73b05d64accde1148" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "52c607eada0544fd8b300e3bafe85b37" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "71459776ec3fe17d8912f3f45d1633e5" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 28844032, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14155776 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21495808 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 21504000 + } + ], + "md5sum": "98cad2c978ee84e32aea8232f5d9ca2e" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "eff52722d4f6bbb88925a458be7d22d6" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 21495808, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14155776 + } + ], + "md5sum": "8be78c79bcbc7dd59723a564cfbc72f6" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "99167fb62218d3e26034c197f77a8aa3" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 262674432, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128259, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262674432, + "byteOffset": 0 + } + ], + "md5sum": "be6213096f7954a8c08120cb6960f78e" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 32834304, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128259, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 32834304, + "byteOffset": 0 + } + ], + "md5sum": "1c9142af090a3c0ad04a40a28500fd99" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c7d1f88d42573d7bba4ab5c88acee7b7" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 30957568, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21504000 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21512192 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21520384 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29908992 + } + ], + "md5sum": "05aaab3081fcc13134141848e282aa12" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9ade4887e2074741f712d3cab5823322" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "00688a4fd29532f4caec6b5b509dcbee" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7348224 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11018240 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 11026432 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 11034624 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 14704640 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 27287552 + } + ], + "md5sum": "11087a54cf33aefa7161e775dce54f98" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4121441eb5031984ac17c004c3bc8942" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 27262976, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9437184 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 25690112 + } + ], + "md5sum": "8969128f561da2d8b4e77e8d1ed878f1" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cf2d41cca20a7323d0bf3668f3544747" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 9445376 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 16785408 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 29368320 + } + ], + "md5sum": "6b824c284774312b0d1ba3b179ce3423" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "545ffeb92d1a3a9fed54475373707bd8" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23592960 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27262976 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 27271168 + } + ], + "md5sum": "16adcd1bcd0d080330eca9b1570f7f71" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "431821dd1c8fe6b69b7384162f928dac" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8d4f5a09c2c6eca9bbd67de816f7ac6a" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "791906fa74ff6201182223773d8f52ba" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 27795456, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 20455424 + } + ], + "md5sum": "e58836bbb970d6e5a867c49aa72ab1a3" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "221e435ae0e0ba172d3fe40b371366d2" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d411fb0e5f70728fb1d078a1b5f453f3" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14163968 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22552576 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "af3ad918225372ce0a49a92916160672" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e7ab23102cbf9b825fd4908c42a7d271" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 30957568, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 21504000 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21512192 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 29900800 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 30949376 + } + ], + "md5sum": "d16feaf8957f72286374c7c7dea778ee" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4ec8dff5d06870bff3d4f8044cdef2a5" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33038336, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 7340032 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 15728640 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 16777216 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 20447232 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20455424 + } + ], + "md5sum": "8bf88524e873c3c15f8ce9d1e4ab4384" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 1572864 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 14155776 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 15728640 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 24117248 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25165824 + } + ], + "md5sum": "9af87b8a9dc8e8319a311dccf596b747" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d99f164f744f5d26fadac05bb4b9d7a9" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "10d0645f08b1772197615081aeae671c" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 27262976, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 23592960 + } + ], + "md5sum": "72adbac5687d46d5b21e01c4ea6ebca2" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "221bba05efcce9895bf3a1fd65a7dea7" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4a1e7a56eb466989e9b999c648ecee32" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6ee645cad39ca412f5399a4c8a8f773b" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 32522240, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 7340032 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 11010048 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18350080 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18358272 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18366464 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 30949376 + } + ], + "md5sum": "e13680d69bfd480a0047f78e2d663fcd" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f2578d375b31a2b3c288760c71b91325" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3670016 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12058624 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20447232 + } + ], + "md5sum": "a5f785ce02bedfed686c5569d1750f0a" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 1581056 + } + ], + "md5sum": "ef72a7b797e70980344ca361f597b75a" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fb56a3b0f0fe0c1bc523b2e06672f0b8" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cb9d334bd1ee278ed3b95f9632f3a8b0" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32530432, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 13115392 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 13123584 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 16801792 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24141824 + } + ], + "md5sum": "44a141488093aef939ff54f598ab335d" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "691c093af629bba4e5e92a37b380aecb" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "bfb0485c993d15439860f8c302f320ac" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 27287552, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 1048576 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8388608 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 16777216 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 17825792 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 17833984 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 17842176 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 17850368 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 26238976 + } + ], + "md5sum": "77b213e7f2e06932be4748e74b48c463" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a650420cd77953d673572db9272e3671" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "07461f2dd52e7b9036a8bee83dae63f6" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c671ded30b1d8ec17407dbc9f9036d69" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 27811840, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7348224 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 7356416 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 15745024 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 16793600 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 24133632 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 24141824 + } + ], + "md5sum": "15ce52123d37a9a2429cbed81d1da8af" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4db3fff85cfa59cba995c4ddccde5105" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "11e8e8796d457a6ec871e4cd3e116852" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 24158208, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 7348224 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 7356416 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 15745024 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16801792 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 16809984 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 20480000 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 24150016 + } + ], + "md5sum": "c60669008ff41dc21607053d1eccaebc" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d9dd04ed0cce96e976e69e453ce3251b" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "468e0375e3a6f57e36dd82ee512576bc" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 25182208, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 14163968 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 17833984 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 25174016 + } + ], + "md5sum": "308be9ecf39c67b265f6e34184e88a38" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "734041d34a5b53e3ff858a8917397737" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 31997952, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 18882560 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 22552576 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22560768 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 30949376 + } + ], + "md5sum": "3bc9a61d309ad791ebffda4a3dd705b2" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fb55bea1231a08cf2199b01a1d0bb993" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "23ce068b8d009d834336d558dcbe0474" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "725c24e682090873b736262152f643d3" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 29917184, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 13115392 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 13123584 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 25190400 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 26238976 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 26247168 + } + ], + "md5sum": "e62f0ad863b9ba04638fee26473a3eb4" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4b661f8f6854044bd00c54135afbb728" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 33079296, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 23592960 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 31981568 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 33030144 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 33038336 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 33046528 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 33054720 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 33062912 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 33071104 + } + ], + "md5sum": "8a877a1c53c0c585a003594762344a03" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "27b02096251b20d61801b2c18d38884c" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d125e0787255ba412bba0582ec4113ba" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 12058624 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16777216 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 25165824 + } + ], + "md5sum": "0d3b524da5435b78d348b7cbf6702906" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "720114b139ff44092755d63d2a50512d" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 29900800, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 7340032 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 15728640 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 16777216 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16785408 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 1048576, + "byteOffset": 25174016 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 8192, + "byteOffset": 26222592 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "bfloat16", + "format": "raw", + "nbytes": 3670016, + "byteOffset": 26230784 + } + ], + "md5sum": "48860c8bc90660d3ba529db23e3df4ad" + } + ] +} \ No newline at end of file diff --git a/tensor-cache.json b/tensor-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..8109164fd0cf2cd70dfd815f4d3edd768c75e0a7 --- /dev/null +++ b/tensor-cache.json @@ -0,0 +1,4375 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 5019827200.0, + "BitsPerParam": 1.5790556154328332 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 262674432, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 128259, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262674432, + "byteOffset": 0 + } + ], + "md5sum": "1a8db284cf568c04eb50e0a61670b435" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3ba101a32a60e4957d4f341ed2474d54" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 32842496, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128259, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 32834304, + "byteOffset": 0 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32834304 + } + ], + "md5sum": "25b9648a2f7f4bd916a635505f8ed165" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "bde72072557ec421c0ff4ae82ce96784" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7340032 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19922944 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 21495808 + } + ], + "md5sum": "0bc04e510210d12f4a773ddfcf2012ae" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "3b7c7544320febb20cc7b3a3310e4415" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 21495808, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14155776 + } + ], + "md5sum": "e5bbdd5d1d201a4ae1bb0828a8a52e60" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 23592960, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + } + ], + "md5sum": "a8fd0fb8a5c610ce1c66b6c10d82b030" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "ea30f6795d29feeae83af89e7e8fa85c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "746157e8b6c2189cc1f5a8d460a445d3" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 23592960 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "edcb64768413917f8fdb66fd7c632851" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "0247610b68f5900228a65256358f0120" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4f7e7f8bed5b1ee976f4b7dfe143223d" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 32505856, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7340032 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19922944 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 21495808 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 25165824 + } + ], + "md5sum": "9b95ca4090dbe045baa76e5c0d444fcf" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "083256552395630b71e265052c56b44f" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 21495808, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14155776 + } + ], + "md5sum": "0fb7eba5c12daa5c6c559d3351c3ec89" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "06e904967db15497efbedf6df26d70d2" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "83e68fa78e4048b638bf27f4ceccc00a" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1f83fe0c80b8e8e2b8741f4b1792206c" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 30949376, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 14163968 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 17833984 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21504000 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29892608 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30941184 + } + ], + "md5sum": "512034d4872f6b17183d1c51b93caa7d" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e1dccd0df2c5d770645275ea79ded18a" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "cdd79ac3215afba2d0a8f249fe99bd48" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 32522240, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 7348224 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 14688256 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 27271168 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 28844032 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32514048 + } + ], + "md5sum": "e45b09ae4128a2291092f533ff8af6b2" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 23592960, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 22020096 + } + ], + "md5sum": "ee6e8e015b80bb375568dbd0a9385600" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "05228c3e13478e4248b37d8d9305b779" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "75588b4068058bea27bf5beb45573db1" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 31997952, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 14155776 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 26738688 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28311552 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 28319744 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 28327936 + } + ], + "md5sum": "d78184ef263ba7c0cd48bf8992a8763a" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 21504000, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + } + ], + "md5sum": "a7e024f53dcbb8c403dc5846471e1230" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "2f555714241ccfbc775ed5dd148c141a" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "5ed3a430e7bab4e5f4b57a5c6de411a5" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 14163968 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 26746880 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 28319744 + } + ], + "md5sum": "6a0b5ed94f61522b1d685e08d79fb8b3" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "1d431ad2068ece3fe4d5c79a3299d003" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "104c92ece5860e60e5ddf41b21f33260" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 28852224, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21504000 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 21512192 + } + ], + "md5sum": "5e15cd3eef6c8fa6b29bacfc72085941" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "42d39b7c0a48c578d45490617652f94a" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "2f52a6a6b635f360945cd50c6f77d450" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 32530432, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 7348224 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 11018240 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18358272 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18366464 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18374656 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 30957568 + } + ], + "md5sum": "f090e66edd23dcf73b05d64accde1148" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "52c607eada0544fd8b300e3bafe85b37" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "71459776ec3fe17d8912f3f45d1633e5" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 28844032, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14155776 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21495808 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 21504000 + } + ], + "md5sum": "98cad2c978ee84e32aea8232f5d9ca2e" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "eff52722d4f6bbb88925a458be7d22d6" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 21495808, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14155776 + } + ], + "md5sum": "8be78c79bcbc7dd59723a564cfbc72f6" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "99167fb62218d3e26034c197f77a8aa3" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 262674432, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 128259, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 262674432, + "byteOffset": 0 + } + ], + "md5sum": "be6213096f7954a8c08120cb6960f78e" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 32834304, + "records": [ + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 128259, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 32834304, + "byteOffset": 0 + } + ], + "md5sum": "1c9142af090a3c0ad04a40a28500fd99" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c7d1f88d42573d7bba4ab5c88acee7b7" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 30957568, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 14163968 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21504000 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21512192 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21520384 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29908992 + } + ], + "md5sum": "05aaab3081fcc13134141848e282aa12" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "9ade4887e2074741f712d3cab5823322" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "00688a4fd29532f4caec6b5b509dcbee" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 28860416, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 7348224 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11018240 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 11026432 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 11034624 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 14704640 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 27287552 + } + ], + "md5sum": "11087a54cf33aefa7161e775dce54f98" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4121441eb5031984ac17c004c3bc8942" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 27262976, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9437184 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 25690112 + } + ], + "md5sum": "8969128f561da2d8b4e77e8d1ed878f1" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cf2d41cca20a7323d0bf3668f3544747" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 9445376 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 16785408 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 29368320 + } + ], + "md5sum": "6b824c284774312b0d1ba3b179ce3423" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "545ffeb92d1a3a9fed54475373707bd8" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 27279360, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23592960 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27262976 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27271168 + } + ], + "md5sum": "16adcd1bcd0d080330eca9b1570f7f71" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "431821dd1c8fe6b69b7384162f928dac" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "8d4f5a09c2c6eca9bbd67de816f7ac6a" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "791906fa74ff6201182223773d8f52ba" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 27795456, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13115392 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 20455424 + } + ], + "md5sum": "e58836bbb970d6e5a867c49aa72ab1a3" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "221e435ae0e0ba172d3fe40b371366d2" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d411fb0e5f70728fb1d078a1b5f453f3" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 27271168, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14163968 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22552576 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23601152 + } + ], + "md5sum": "af3ad918225372ce0a49a92916160672" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "e7ab23102cbf9b825fd4908c42a7d271" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 30957568, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 7348224 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 19931136 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21504000 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 21512192 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 29900800 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30949376 + } + ], + "md5sum": "d16feaf8957f72286374c7c7dea778ee" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4ec8dff5d06870bff3d4f8044cdef2a5" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 33038336, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 7340032 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 15728640 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 16777216 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20447232 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20455424 + } + ], + "md5sum": "8bf88524e873c3c15f8ce9d1e4ab4384" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 1572864 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 14155776 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 15728640 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24117248 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25165824 + } + ], + "md5sum": "9af87b8a9dc8e8319a311dccf596b747" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d99f164f744f5d26fadac05bb4b9d7a9" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "10d0645f08b1772197615081aeae671c" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 27262976, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 14155776 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 22544384 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 23592960 + } + ], + "md5sum": "72adbac5687d46d5b21e01c4ea6ebca2" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "221bba05efcce9895bf3a1fd65a7dea7" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "4a1e7a56eb466989e9b999c648ecee32" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "6ee645cad39ca412f5399a4c8a8f773b" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 32522240, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 7340032 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 11010048 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18350080 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18358272 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 18366464 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 30949376 + } + ], + "md5sum": "e13680d69bfd480a0047f78e2d663fcd" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "f2578d375b31a2b3c288760c71b91325" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 33030144, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3670016 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12058624 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 20447232 + } + ], + "md5sum": "a5f785ce02bedfed686c5569d1750f0a" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 30941184, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 0 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1572864 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 1581056 + } + ], + "md5sum": "ef72a7b797e70980344ca361f597b75a" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fb56a3b0f0fe0c1bc523b2e06672f0b8" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "cb9d334bd1ee278ed3b95f9632f3a8b0" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 32530432, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 3670016 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3678208 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12066816 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 13115392 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 13123584 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 16801792 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24141824 + } + ], + "md5sum": "44a141488093aef939ff54f598ab335d" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "691c093af629bba4e5e92a37b380aecb" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "bfb0485c993d15439860f8c302f320ac" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 27287552, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 1048576 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8388608 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 16777216 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17825792 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17833984 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17842176 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 17850368 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 26238976 + } + ], + "md5sum": "77b213e7f2e06932be4748e74b48c463" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "a650420cd77953d673572db9272e3671" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "07461f2dd52e7b9036a8bee83dae63f6" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "c671ded30b1d8ec17407dbc9f9036d69" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 27811840, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7348224 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 7356416 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 15745024 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 16793600 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24133632 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 24141824 + } + ], + "md5sum": "15ce52123d37a9a2429cbed81d1da8af" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4db3fff85cfa59cba995c4ddccde5105" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "11e8e8796d457a6ec871e4cd3e116852" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 24158208, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7340032 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 7348224 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 7356416 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 15745024 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16801792 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 16809984 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 20480000 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 24150016 + } + ], + "md5sum": "c60669008ff41dc21607053d1eccaebc" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "d9dd04ed0cce96e976e69e453ce3251b" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "468e0375e3a6f57e36dd82ee512576bc" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 25182208, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 12582912 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 14155776 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 14163968 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 17833984 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25174016 + } + ], + "md5sum": "308be9ecf39c67b265f6e34184e88a38" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "734041d34a5b53e3ff858a8917397737" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 31997952, + "records": [ + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 9437184 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 17825792 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18874368 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 18882560 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22552576 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 22560768 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 30949376 + } + ], + "md5sum": "3bc9a61d309ad791ebffda4a3dd705b2" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "fb55bea1231a08cf2199b01a1d0bb993" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "23ce068b8d009d834336d558dcbe0474" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "725c24e682090873b736262152f643d3" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 29917184, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 9445376 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 13115392 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 13123584 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16793600 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16801792 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 25190400 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26238976 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 26247168 + } + ], + "md5sum": "e62f0ad863b9ba04638fee26473a3eb4" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "4b661f8f6854044bd00c54135afbb728" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 33079296, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 6144, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 12582912, + "byteOffset": 9437184 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 6144, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1572864, + "byteOffset": 22020096 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 23592960 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 31981568 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33030144 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33038336 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33046528 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33054720 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33062912 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 33071104 + } + ], + "md5sum": "8a877a1c53c0c585a003594762344a03" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "27b02096251b20d61801b2c18d38884c" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 58720256, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 28672, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 58720256, + "byteOffset": 0 + } + ], + "md5sum": "d125e0787255ba412bba0582ec4113ba" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 26214400, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3670016 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 12058624 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 13107200 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16777216 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 25165824 + } + ], + "md5sum": "0d3b524da5435b78d348b7cbf6702906" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 29360128, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1792 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 29360128, + "byteOffset": 0 + } + ], + "md5sum": "720114b139ff44092755d63d2a50512d" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 29900800, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 28672, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 7340032, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 7340032 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 15728640 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 16777216 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16785408 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 25174016 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26222592 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 448 + ], + "dtype": "float32", + "format": "f32-to-bf16", + "nbytes": 3670016, + "byteOffset": 26230784 + } + ], + "md5sum": "48860c8bc90660d3ba529db23e3df4ad" + } + ] +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..de3c72f94f3d8f74ac290d0f405f16c7fb48637b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31d17e500c82ff116fc14d4dd0106a08e49bd3512d8a81473047121ef8ea09f9 +size 17210524 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..36c19e04d1d497adb217b6481528bdae7cc61151 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2088 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128256": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128257": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128258": { + "content": "<|NONE|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{# version=v3-llama3.1 #}{%- macro append_new_param_info(param_declaration, comment_info, examples_info, depth) -%}\n {%- set offset = \"\" -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- if examples_info | length > 0 -%}\n {# Append each example info #}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{ \"\\n\" + offset + param_declaration }}\n{%- endmacro -%}\n\n{%- macro convert_data_type(param_type) -%}\n {%- if param_type == \"integer\" or param_type == \"float\" -%}\n {{ \"number\" }}\n {%- else -%}\n {{ param_type }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_param_type(param) -%}\n {%- set param_type = \"any\" -%}\n\n {%- if \"type\" in param -%}\n {%- set raw_param_type = param[\"type\"] -%}\n {%- if raw_param_type is iterable and raw_param_type is not string -%}\n {%- set param_type = raw_param_type | join(\" | \") -%}\n {%- else -%}\n {%- set param_type = raw_param_type -%}\n {%- endif -%}\n {{ convert_data_type(param_type) }}\n {%- elif \"oneOf\" in param -%}\n {%- set one_of_types = param[\"oneOf\"]|selectattr(\"type\", \"defined\")|list -%}\n {%- set one_of_types = one_of_types|map(attribute=\"type\")|unique|list -%}\n {{ convert_data_type(one_of_types | join(\" | \")) }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_format_param(param) -%}\n {%- if \"format\" in param -%}\n {{ param[\"format\"] }}\n {%- elif \"oneOf\" in param -%}\n {%- set formats = [] -%}\n {%- for item in param[\"oneOf\"] -%}\n {%- if \"format\" in item -%}\n {%- if item[\"format\"] == param[\"oneOf\"][-1][\"format\"] -%}\n {{ item[\"format\"] }}\n {%- else -%}\n {{ item[\"format\"] + \" or \"}}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ \"<|NONE|>\" }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_param_info(param) -%}\n {%- set param_type = param.get(\"type\", \"any\") -%}\n {%- set format_param = get_format_param(param) -%}\n\n {%- if \"description\" in param or \"default\" in param or format_param != \"<|NONE|>\" or param[\"maximum\"] or param[\"minimum\"] or param[\"maxLength\"] or param[\"minLength\"] -%}\n {{ \"//\" }}\n {%- if \"description\" in param -%}\n {%- set desc = param[\"description\"] -%}\n {%- if not desc.endswith(\".\") -%}\n {%- set desc = desc + \".\" -%}\n {%- endif -%}\n {{ \" \" + desc }}\n {%- endif -%}\n\n {%- if \"default\" in param -%}\n {%- set default_value = param[\"default\"] -%}\n {%- if param_type == \"string\" -%}\n {%- set default_value = '\"' ~ default_value ~ '\"' -%}\n {%- endif -%}\n {{ \" Default=\" ~ default_value ~ \".\" }}\n {%- endif -%}\n\n {%- set format_param = get_format_param(param) -%}\n {%- if format_param != \"<|NONE|>\" -%}\n {{ \" Format=\" ~ format_param }}\n {%- endif -%}\n\n {%- for field, field_name in [(\"maximum\", \"Maximum\"), (\"minimum\", \"Minimum\"), (\"maxLength\", \"Maximum length\"), (\"minLength\", \"Minimum length\")] -%}\n {%- if field in param -%}\n {{ \" \" + field_name ~ \"=\" ~ param[field] }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ \"<|NONE|>\"}}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_enum_option_str(enum_options) -%}\n {%- for v in enum_options -%}\n {%- if v is string -%}\n {{ '\"' + v + '\"' }}\n {%- else -%}\n {{ v }}\n {%- endif -%}\n {%- if enum_options|length > 0 and v != enum_options[-1] -%}\n {{ \" | \" }}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n\n{%- macro get_array_typescript(param_name, param_dic, depth) -%}\n {%- set offset = '' -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n {%- set items_info = param_dic.get('items', {}) -%}\n\n {%- if items_info|length == 0 -%}\n {%- if param_name -%}\n {{ \"\\n\" + offset + param_name + \": []\" }}\n {%- else -%}\n {{ \"\\n\" + offset + \"[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- set array_type = get_param_type(items_info) -%}\n {%- if array_type == 'object' -%}\n {%- if param_name -%}\n {{ \"\\n\" + offset + param_name + \": {\" }}\n {%- else -%}\n {{ \"\\n\" + offset + \"{\" }}\n {%- endif -%}\n {{ get_parameter_typescript(items_info.get('properties', {}), items_info.get('required', []), depth + 1) -}}\n {{- \"\\n\" + offset + \"}[]\" }}\n {%- elif array_type == 'array' -%}\n {%- set item_info = get_array_typescript(None, items_info, depth + 1) -%}\n {%- if not param_name -%}\n {{ \"\\n\" + item_info + \"[]\" }}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": \" + item_info|trim + \"[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- if 'enum' in items_info -%}\n {%- set item_type = get_enum_option_str(items_info['enum']) -%}\n {%- if param_name is none -%}\n {{ \"(\" + item_type + \")[]\"}}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": (\" + item_type + \")[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- if param_name is none -%}\n {{ \"\\n\" + array_type + \"[]\" }}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": \" + array_type + \"[],\" }}\n {%- endif -%}\n {%- endif -%}\n {%- endif -%}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_parameter_typescript(properties, required_params, depth=0) -%}\n {%- set res = \"\" -%}\n {%- for param_name, param in properties.items() -%}\n {%- if param is mapping -%}\n {%- set comment_info = get_param_info(param) -%}\n {# Param Examples #}\n {%- set examples_info = [] -%}\n {%- if \"examples\" in param -%}\n {%- set examples_info = [\"Example \" + param_name + \":\"] -%}\n {%- set examples_info = examples_info + param[\"examples\"] -%}\n {%- endif -%}\n\n {# Param Name declaration #}\n {%- set param_declaration = param_name -%}\n {%- if required_params is iterable and param_name not in required_params -%}\n {%- set param_declaration = param_declaration + \"?\" -%}\n {%- endif -%}\n\n {%- set param_type = get_param_type(param) -%}\n\n {# Handle indentation based on depth #}\n {%- set offset = \"\" -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n\n {%- if param_type == \"object\" -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- endif -%}\n {%- if examples_info|length > 0 -%}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- set param_declaration = param_declaration + \": {\" -%}\n {{ \"\\n\" + offset + param_declaration -}}\n {{- get_parameter_typescript(param.get(\"properties\", {}), param.get(\"required\", []), depth + 1) -}}\n {{- \"\\n\" + offset + \"},\" }}\n {%- elif param_type == \"array\" -%}\n {%- set item_info = param.get(\"items\", {}) -%}\n {%- if \"type\" not in item_info -%}\n {%- set param_declaration = param_declaration + \": [],\" -%}\n {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}\n {%- else -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- endif -%}\n {%- if examples_info|length > 0 -%}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- set array_declaration = get_array_typescript(param_declaration, param, depth) -%}\n {%- if not array_declaration.endswith(\",\") -%}\n {%- set array_declaration = array_declaration + \",\" -%}\n {%- endif -%}\n {{ array_declaration}}\n {%- endif -%}\n {%- else -%}\n {%- if \"enum\" in param -%}\n {%- set param_type = get_enum_option_str(param[\"enum\"]) -%}\n {%- endif -%}\n {%- if \"nullable\" in param and param[\"nullable\"] -%}\n {%- set param_type = param_type + \" | null\" -%}\n {%- endif -%}\n {%- set param_declaration = param_declaration + \": \" + param_type + \",\" -%}\n {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n\n{%- macro generate_schema_from_functions(functions, namespace='functions') -%}\n {{ \"// Supported function definitions that should be called when necessary.\\n\" -}}\n {{- \"namespace \" + namespace + \" {\\n\\n\" -}}\n\n {%- for function in functions -%}\n {%- if function.get(\"function\") -%}\n {%- set function = function.get(\"function\") -%}\n {%- endif -%}\n\n {%- set function_name = function.get(\"name\") -%}\n {%- if function_name -%}\n {%- set description = function.get('description', '') -%}\n {%- set parameters = function.get('parameters', {}) -%}\n {{- \"// \" + description + \"\\n\" -}}\n {{- \"type \" + function_name -}}\n {%- if parameters and parameters.get(\"properties\") -%}\n {{- \" = (_: {\" -}}\n {%- set required_params = parameters.get(\"required\", []) -%}\n {{ get_parameter_typescript(parameters.get(\"properties\"), required_params, 0) -}}\n {{- \"\\n}) => any;\\n\\n\" }}\n {%- else -%}\n {{ \" = () => any;\\n\\n\" }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {{ \"} // namespace \" + namespace }}\n{%- endmacro -%}\n{%- if not tools is defined -%}\n {%- set tools = none -%}\n{%- endif -%}\n\n{%- set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", \"code_interpreter\") | list | length > 0 -%}\n{%- if has_code_interpreter -%}\n {%- set tools = tools | rejectattr(\"type\", \"equalto\", \"code_interpreter\") | list -%}\n{%- endif -%}\n\n{#- System message + builtin tools #}\n{{- bos_token + \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if has_code_interpreter %}\n {{- \"Environment: ipython\\n\\n\" }}\n{%- else -%}\n {{ \"\\n\"}}\n{%- endif %}\n{%- if tools %}\n {{- \"\\nYou have access to the following functions:\\n\\n\" }}\n {%- for t in tools %}\n {%- if \"type\" in t -%}\n {{ \"Use the function '\" + t[\"function\"][\"name\"] + \"' to '\" + t[\"function\"][\"description\"] + \"'\\n\" + t[\"function\"] | tojson() }}\n {%- else -%}\n {{ \"Use the function '\" + t[\"name\"] + \"' to '\" + t[\"description\"] + \"'\\n\" + t | tojson }}\n {%- endif -%}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- '\\nThink very carefully before calling functions.\\nIf a you choose to call a function ONLY reply in the following format:\\n<{start_tag}={function_name}>{parameters}{end_tag}\\nwhere\\n\\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\\nend_tag => ``\\n\\nHere is an example,\\n{\"example_name\": \"example_value\"}\\n\\nReminder:\\n- If looking for real time information use relevant functions before falling back to brave_search\\n- Function calls MUST follow the specified format, start with \\n- Required parameters MUST be specified\\n- Only call one function at a time\\n- Put the entire function call reply on one line\\n\\n' -}}\n{%- endif %}\n{{- \"<|eot_id|>\" -}}\n\n{%- for message in messages -%}\n {%- if message['role'] == 'user' or message['role'] == 'system' -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n' + message['content'] + '<|eot_id|>' }}\n {%- elif message['role'] == 'tool' -%}\n {{ '<|start_header_id|>ipython<|end_header_id|>\\n\\n' + message['content'] + '<|eot_id|>' }}\n {%- else -%}\n {%- if (message['content'] and message['content']|length > 0) or ('tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0) -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'}}\n {%- endif -%}\n {%- if message['content'] and message['content']|length > 0 -%}\n {{ message['content'] }}\n {%- endif -%}\n {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0 -%}\n {%- for tool_call in message['tool_calls'] -%}\n {%- if tool_call[\"function\"][\"name\"] == \"python\" -%}\n {{ '<|python_tag|>' + tool_call['function']['arguments'] }}\n {%- else -%}\n {{ '' + tool_call['function']['arguments'] + '' }}\n {%- endif -%}\n {%- endfor -%}\n {{ '<|eom_id|>' }}\n {%- elif message['content'] and message['content']|length > 0 -%}\n {{ '<|eot_id|>' }}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif -%}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +}