diff --git a/.gitattributes b/.gitattributes
index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
diff --git a/mlc-chat-config.json b/mlc-chat-config.json
new file mode 100644
index 0000000000000000000000000000000000000000..590e65e99b4a1bbb69ae78996d6897cc8f15482f
--- /dev/null
+++ b/mlc-chat-config.json
@@ -0,0 +1,83 @@
+{
+ "version": "0.1.0",
+ "model_type": "llama",
+ "quantization": "q4f32_1",
+ "model_config": {
+ "hidden_size": 4096,
+ "intermediate_size": 14336,
+ "num_attention_heads": 32,
+ "num_hidden_layers": 32,
+ "rms_norm_eps": 1e-05,
+ "vocab_size": 128259,
+ "tie_word_embeddings": false,
+ "position_embedding_base": 8000000.0,
+ "rope_scaling": null,
+ "context_window_size": 32768,
+ "prefill_chunk_size": 8192,
+ "num_key_value_heads": 8,
+ "head_dim": 128,
+ "tensor_parallel_shards": 1,
+ "pipeline_parallel_stages": 1,
+ "max_batch_size": 128,
+ "disaggregation": false
+ },
+ "vocab_size": 128259,
+ "context_window_size": 32768,
+ "sliding_window_size": -1,
+ "prefill_chunk_size": 8192,
+ "attention_sink_size": -1,
+ "tensor_parallel_shards": 1,
+ "pipeline_parallel_stages": 1,
+ "active_vocab_size": 128259,
+ "temperature": 1.0,
+ "presence_penalty": 0.0,
+ "frequency_penalty": 0.0,
+ "repetition_penalty": 1.0,
+ "top_p": 1.0,
+ "tokenizer_files": [
+ "tokenizer.json",
+ "tokenizer_config.json"
+ ],
+ "tokenizer_info": {
+ "token_postproc_method": "byte_level",
+ "prepend_space_in_encode": false,
+ "strip_space_in_decode": false
+ },
+ "conv_template": {
+ "name": "llama-3",
+ "system_template": "<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>",
+ "system_message": "You are a helpful, respectful and honest assistant.",
+ "system_prefix_token_ids": [
+ 128000
+ ],
+ "add_role_after_system_message": true,
+ "roles": {
+ "user": "<|start_header_id|>user",
+ "assistant": "<|start_header_id|>assistant"
+ },
+ "role_templates": {
+ "user": "{user_message}",
+ "assistant": "{assistant_message}",
+ "tool": "{tool_message}"
+ },
+ "messages": [],
+ "seps": [
+ "<|eot_id|>"
+ ],
+ "role_content_sep": "<|end_header_id|>\n\n",
+ "role_empty_sep": "<|end_header_id|>\n\n",
+ "stop_str": [
+ "<|end_of_text|>",
+ "<|eot_id|>"
+ ],
+ "stop_token_ids": [
+ 128001,
+ 128009
+ ],
+ "function_string": "",
+ "use_function_calling": false
+ },
+ "pad_token_id": 128001,
+ "bos_token_id": 128000,
+ "eos_token_id": 128009
+}
\ No newline at end of file
diff --git a/params_shard_0.bin b/params_shard_0.bin
new file mode 100644
index 0000000000000000000000000000000000000000..29aa13c71350992f900689434f34cd0793103943
--- /dev/null
+++ b/params_shard_0.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78d37e9d338442879e38d0a5a93c2e6c5d0702c2564b819a7d1f31e87d8f6d98
+size 262674432
diff --git a/params_shard_1.bin b/params_shard_1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f73d7b66178236e7e86a5a42d85e6927b69f2bc2
--- /dev/null
+++ b/params_shard_1.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:318196e67ded49b0ef68442fe073c8bb8d1f613260ebff97b33c0a791e825023
+size 58720256
diff --git a/params_shard_10.bin b/params_shard_10.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a3956e4f451f244c78f69569b7ad3d1c70e30e98
--- /dev/null
+++ b/params_shard_10.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d067826521451875e22f38766b2034527d520549b1954b1ef4ad65959e549760
+size 27271168
diff --git a/params_shard_100.bin b/params_shard_100.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ddfa357c139653856e25558ca3977eb74ef0c533
--- /dev/null
+++ b/params_shard_100.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:418186dc523ca7b868e00ac747c9dae6884c1f95c19afe1eb3ce975b53e579b2
+size 29360128
diff --git a/params_shard_101.bin b/params_shard_101.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c71c66efbbef4b57d77ec84f3708c4a6ebfe136c
--- /dev/null
+++ b/params_shard_101.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b558d4e84bf7b7b00f52ab9f9d1733ecdb7864817a6c5ffc55be4a98c4a6212a
+size 33079296
diff --git a/params_shard_102.bin b/params_shard_102.bin
new file mode 100644
index 0000000000000000000000000000000000000000..84f167d98a3b141db9a49fac5f23316262f52a4e
--- /dev/null
+++ b/params_shard_102.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da591c28edf9ad347528b28d9743f810cf341332945849aea18a0e4f91220d07
+size 29360128
diff --git a/params_shard_103.bin b/params_shard_103.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0dbafebc4624ac1c80c675c156e1b12ee5dfe509
--- /dev/null
+++ b/params_shard_103.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96293734335bb73239edb057da74e506c5743a775370c6d99d5d90c2b19385d2
+size 58720256
diff --git a/params_shard_104.bin b/params_shard_104.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2dfe1c9ea7595492536d2a9bc24701c8c9ebf6ff
--- /dev/null
+++ b/params_shard_104.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46ec2ca7a9c1545fbf49a792284a1990cb121d9f551b7994dd4a1a2a8e11a93c
+size 26214400
diff --git a/params_shard_105.bin b/params_shard_105.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6039a24989e3acc962da840d159e714512923a3d
--- /dev/null
+++ b/params_shard_105.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:216047103de55920924409558c3639897a82ed4e0f64774798d357af05f1166c
+size 29360128
diff --git a/params_shard_106.bin b/params_shard_106.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8143331351860f745680794d2303cb66a9e82dba
--- /dev/null
+++ b/params_shard_106.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0485a3e57814cd3ea4ff17b4b18babb5cf1af571cc5f3daf579b81abcef90ce
+size 29900800
diff --git a/params_shard_11.bin b/params_shard_11.bin
new file mode 100644
index 0000000000000000000000000000000000000000..80c2aea62edd2fcdb57ec7cfdae523f63279516d
--- /dev/null
+++ b/params_shard_11.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:031a783475aa77ead79baeeb6a939678ed4e402b1a29e8545b10c5dabe24a966
+size 29360128
diff --git a/params_shard_12.bin b/params_shard_12.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8695d2081652e4df0f810f18b22be7f2a1556771
--- /dev/null
+++ b/params_shard_12.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b96217573ffbc46e25ac2d43e2aab0506acd00f6b3c8552d807758326bc02c8a
+size 58720256
diff --git a/params_shard_13.bin b/params_shard_13.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0127af87730009aa406f47576cce8b98a1913327
--- /dev/null
+++ b/params_shard_13.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b53c43ef414d77da0808db5074f6910be66f7b7a1923c438cd976edcffff3eab
+size 32505856
diff --git a/params_shard_14.bin b/params_shard_14.bin
new file mode 100644
index 0000000000000000000000000000000000000000..87a9635b3303c91efc9b8ddb8c724d045b520bb6
--- /dev/null
+++ b/params_shard_14.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82f7897f2cfab38ecc05961aaf2041daf28b82ef4b3e0742189013512f69c354
+size 58720256
diff --git a/params_shard_15.bin b/params_shard_15.bin
new file mode 100644
index 0000000000000000000000000000000000000000..10a62d7ef67af3f63aee14d725cb365700cbd560
--- /dev/null
+++ b/params_shard_15.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7c3bc440719f970701057532a40bf7d94113af88f2bc5cc983f0580087a8335
+size 21495808
diff --git a/params_shard_16.bin b/params_shard_16.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f52e993514085d6c834876d26afd5fba010dd637
--- /dev/null
+++ b/params_shard_16.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71d70c5b78b11508241026ba299ae73e3298f38d4c613d498d816683e58d03ee
+size 29360128
diff --git a/params_shard_17.bin b/params_shard_17.bin
new file mode 100644
index 0000000000000000000000000000000000000000..55db3c5238965a9e30d5170b1300d483b2ed9e69
--- /dev/null
+++ b/params_shard_17.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5b40195350072e3fa1233b3f2aee6b430ea0b32a86f0482087700d8becc78f2
+size 29360128
diff --git a/params_shard_18.bin b/params_shard_18.bin
new file mode 100644
index 0000000000000000000000000000000000000000..26da16bcc1a03e8dc5970a8fe60bc078e3e8ccdb
--- /dev/null
+++ b/params_shard_18.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67609965ae013156a2b124567b331218e9a294eec475b73752f477b818a9a30a
+size 58720256
diff --git a/params_shard_19.bin b/params_shard_19.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0a241d4a64bd889a75d2547fe863c291f2252a24
--- /dev/null
+++ b/params_shard_19.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e661bcbcac4d0dbd082b7546fc9bf604d37766a83de1f80b27373830a281f3d5
+size 30949376
diff --git a/params_shard_2.bin b/params_shard_2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5b1735cf1343de3ae4553827098a1ec2937d84e6
--- /dev/null
+++ b/params_shard_2.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c8ea8db0eede4c2854c86f365cd9cbec2f95ff16f284712af605768f5e7fcb7
+size 32842496
diff --git a/params_shard_20.bin b/params_shard_20.bin
new file mode 100644
index 0000000000000000000000000000000000000000..15c81a2e53bb86e2bde8df139d0ade3101d8ce31
--- /dev/null
+++ b/params_shard_20.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f9cf11e5457f59d65aae84fca91dbef276d9a43369cccf681634a9edc63b1f0
+size 58720256
diff --git a/params_shard_21.bin b/params_shard_21.bin
new file mode 100644
index 0000000000000000000000000000000000000000..349a99d4e17ed376a06ca945c08603be5725b714
--- /dev/null
+++ b/params_shard_21.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d344bdd269e548e9d507fb2e046d9340f7f1cdc80db7c6c73cc8b2561a7a67d
+size 29360128
diff --git a/params_shard_22.bin b/params_shard_22.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4958c4352052f163f79d9063876b67ba9da820d2
--- /dev/null
+++ b/params_shard_22.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6577624f7028d2aa2f06e0ea22a17a6564f0b2c94ac2e857fcd7fb844dd37261
+size 32522240
diff --git a/params_shard_23.bin b/params_shard_23.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6c55672b48362f78a400da505a559b9370902ee3
--- /dev/null
+++ b/params_shard_23.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e699910757c9a0a92aac556793d4e6977402956a083205300cdf0e6f70649f0
+size 23592960
diff --git a/params_shard_24.bin b/params_shard_24.bin
new file mode 100644
index 0000000000000000000000000000000000000000..feebe11b8e3373adb4939cf00464141612629d7d
--- /dev/null
+++ b/params_shard_24.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:291a85428557fd71a37e5195f51bd204d14c50f0ebae9325167569377d465e65
+size 29360128
diff --git a/params_shard_25.bin b/params_shard_25.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d44945c0ecf15380ab8ae76d76f214fba545ec9e
--- /dev/null
+++ b/params_shard_25.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:30154072613300f7430d546ed6d4692aabe90f55ec4fecdf1b9d2c734514a79a
+size 58720256
diff --git a/params_shard_26.bin b/params_shard_26.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1b4b9335d4e5e7ac9e11b4249f45e5251fc19987
--- /dev/null
+++ b/params_shard_26.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:adcf0dc5c0b9fb3058e28039dd179a22f99b77a20d14778173d0309463c91691
+size 31997952
diff --git a/params_shard_27.bin b/params_shard_27.bin
new file mode 100644
index 0000000000000000000000000000000000000000..eadefb85d043fa954f3ad2b686967eea09a33ffb
--- /dev/null
+++ b/params_shard_27.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3df87c2162421d5283f5bb74dc78151d91cf96f12f48adccde6e51ef6f6ba984
+size 21504000
diff --git a/params_shard_28.bin b/params_shard_28.bin
new file mode 100644
index 0000000000000000000000000000000000000000..39d217f6f95c0a9a0990d017d29d3de53e4761f3
--- /dev/null
+++ b/params_shard_28.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc42def184109d2c5401a89eed97695703e04cc012d5f811dbc56ecf3ede32de
+size 29360128
diff --git a/params_shard_29.bin b/params_shard_29.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e193712d3678a79e741b61596b9142777c6944ed
--- /dev/null
+++ b/params_shard_29.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82815d9427960171b7777017e03531a9e42fc2075a5c6b7157d24a662680ffa0
+size 58720256
diff --git a/params_shard_3.bin b/params_shard_3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c399e606a765673a168733bd8bf80fec63e64086
--- /dev/null
+++ b/params_shard_3.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47969276516a83392392d3eeed6532dffe1b3cc89662893c8f9a17ceeaf4048c
+size 29360128
diff --git a/params_shard_30.bin b/params_shard_30.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b61eb70b567ea89fb8e9cb2a56eb502286e17154
--- /dev/null
+++ b/params_shard_30.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:785261bbda3be6ec31319a01cc42cb63e4e7ce7747e50eb65754515fe37dbd59
+size 31989760
diff --git a/params_shard_31.bin b/params_shard_31.bin
new file mode 100644
index 0000000000000000000000000000000000000000..83697ebe39f66395f6b532f421f64b0e7892a6f8
--- /dev/null
+++ b/params_shard_31.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fb01af8811e59013595316bce9c831b7ca8442a67b9ea0cc0d2293954d082ea8
+size 58720256
diff --git a/params_shard_32.bin b/params_shard_32.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c59ee567ffa08ef54ffe753ef7a2b43bc70c3684
--- /dev/null
+++ b/params_shard_32.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c446397bc17fa447bf173ed2d41329c48ae568ac9e0de5a5c491e794caff1eb4
+size 58720256
diff --git a/params_shard_33.bin b/params_shard_33.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5f20abde37efcacc2fb244fd737d37ebed20fc81
--- /dev/null
+++ b/params_shard_33.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8d4623e493c10ceba16ed95d3c3eb62ee5c5bb68022665f0258c255defae2dc
+size 28852224
diff --git a/params_shard_34.bin b/params_shard_34.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bb02fecf09af5cc89d90c9dad4688b43234f067b
--- /dev/null
+++ b/params_shard_34.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15640bf6bc444b4494dd5a5fe7e93c8dbd55793ae3cf976d77091ba24f156464
+size 29360128
diff --git a/params_shard_35.bin b/params_shard_35.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d7b5f10cca50ed7ff8b3f0d1876946e0b92456d6
--- /dev/null
+++ b/params_shard_35.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b79fbefe8f6fa75dc5d60953f19df25e2ffa644283f8a19a437ba702ac297348
+size 58720256
diff --git a/params_shard_36.bin b/params_shard_36.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c2cfc095e68b9e751af3f106d3507b21255a6f18
--- /dev/null
+++ b/params_shard_36.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd0d2ff14974057fbc85fa1deabb5df07a292d3eb0bc995de6a10cf4cc023719
+size 32530432
diff --git a/params_shard_37.bin b/params_shard_37.bin
new file mode 100644
index 0000000000000000000000000000000000000000..88e8b85376ba3c9c11c58fc14348a48148a60ce4
--- /dev/null
+++ b/params_shard_37.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f641f296ac44cff3d82dc7dca05bdfeb2ed5268b5e3d8f61d1428a64dfcd8e9
+size 58720256
diff --git a/params_shard_38.bin b/params_shard_38.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bcd12cf7ad5f74c0fa59d2d4144256fb661fdc5e
--- /dev/null
+++ b/params_shard_38.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8bf35222fd1e9d2d764bd45b3e0c153c1981aa2f2647bb756dc09731a590995d
+size 58720256
diff --git a/params_shard_39.bin b/params_shard_39.bin
new file mode 100644
index 0000000000000000000000000000000000000000..897bfa5cac4511f19cd54575de508649b52f9920
--- /dev/null
+++ b/params_shard_39.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9d4af193c7af4f3ea933792d73af204748237ffad85298bca77a57f642a6b80
+size 28844032
diff --git a/params_shard_4.bin b/params_shard_4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c099cd4951b65c7ebf666094382fe3c3ac5c858f
--- /dev/null
+++ b/params_shard_4.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc785246f4dc87fc05661b3d695b8bafe5fc3e7da455f7cffb3a2cf6f360fd1e
+size 25165824
diff --git a/params_shard_40.bin b/params_shard_40.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5659d418dc9aced4ed4d8592ec6e0c822d1796f6
--- /dev/null
+++ b/params_shard_40.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90a83b4b4907e4bf2f6e2a55121a0c74e924a6b84134e65bf3b7032027bd3f8d
+size 58720256
diff --git a/params_shard_41.bin b/params_shard_41.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d053536ed4846d0326da4938876b83947a6a4428
--- /dev/null
+++ b/params_shard_41.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4844797b7b016d0bfab0edfbc23739c6415962c1626f22ce8979fded92217766
+size 21495808
diff --git a/params_shard_42.bin b/params_shard_42.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a77054d34a36da2efb5fa6de857e698637a5abd6
--- /dev/null
+++ b/params_shard_42.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c8c004b56b5555ed7b6d2be48bc0a7b4fcade5eaa603b5e5abed54454706580
+size 58720256
diff --git a/params_shard_43.bin b/params_shard_43.bin
new file mode 100644
index 0000000000000000000000000000000000000000..55207dbbb7f2fcb8308671f91ff59c0c360f80fb
--- /dev/null
+++ b/params_shard_43.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4fa970e3378f7523567b14ddf20e82307cbb8f285ede0f60b418291136a8f121
+size 262674432
diff --git a/params_shard_44.bin b/params_shard_44.bin
new file mode 100644
index 0000000000000000000000000000000000000000..89f3887f1ced1f5d307d45af74c4fd987d7abfd2
--- /dev/null
+++ b/params_shard_44.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b424be4a44869eeddafba940c7d033dddff30c157f2c4375d98d67ae700097a7
+size 32834304
diff --git a/params_shard_45.bin b/params_shard_45.bin
new file mode 100644
index 0000000000000000000000000000000000000000..abd81e0f3d353826db064218aa8f28b04eb7eb15
--- /dev/null
+++ b/params_shard_45.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0df96feb88b7974467d45fecac08c6e08dce26c595c965db21a142cf54807c07
+size 58720256
diff --git a/params_shard_46.bin b/params_shard_46.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d8b76584cb7a5b8e6f829c11b5e9ad92bc933f71
--- /dev/null
+++ b/params_shard_46.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce5066c6bf3403117f593cb9ba38fc94c573d4413270f82e7c82599f59620fe7
+size 30957568
diff --git a/params_shard_47.bin b/params_shard_47.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4536a47512a012345afc2cc4095ce286cd9b2a85
--- /dev/null
+++ b/params_shard_47.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf23bda797e622640d21cc23a3a3be8a46129471575face1665d3bfe53974045
+size 29360128
diff --git a/params_shard_48.bin b/params_shard_48.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5cd86cd6efa1ea5287d46e2ae9de0d3e05260d0a
--- /dev/null
+++ b/params_shard_48.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:23fc4dfaf0f081f396ba0774afa4df36f60709fccd52ec6a3058c0e4c86eecbb
+size 29360128
diff --git a/params_shard_49.bin b/params_shard_49.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5a8c7ff5e8a082e7343828c6f9043cbeef128b0a
--- /dev/null
+++ b/params_shard_49.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fb7b48daa0c80ed2266f997035a31cc82742c1e0b620f337bc83045f1314e4d
+size 28860416
diff --git a/params_shard_5.bin b/params_shard_5.bin
new file mode 100644
index 0000000000000000000000000000000000000000..67d4d815fb126ecb70fb3bc32cb8ef2c884956a8
--- /dev/null
+++ b/params_shard_5.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c139e66a7309dbf041fee16266097c0de5d74fdf8d328dc69fed426ee0976560
+size 58720256
diff --git a/params_shard_50.bin b/params_shard_50.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0dd6590fd6105b4616e717cd5417548ca290b812
--- /dev/null
+++ b/params_shard_50.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9963b218a9022fe324d3e67d36d61f32252d2076f88b0fb67cd2ad9ce59a2ed
+size 29360128
diff --git a/params_shard_51.bin b/params_shard_51.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ee6f3e1bee72a58515b5f774926aa409a2eba63d
--- /dev/null
+++ b/params_shard_51.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6782e5b6ba0a91a4cbd17e137d868dd11b18e4a11b5b6bf5f046ce5c7d81b51e
+size 27262976
diff --git a/params_shard_52.bin b/params_shard_52.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7ff1d49b4988c848953952aefaf923fa21e9ca2c
--- /dev/null
+++ b/params_shard_52.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60549b7faa5299acd26591cbe75a4c6502cee872d063c48c21283dd366f8cf5a
+size 58720256
diff --git a/params_shard_53.bin b/params_shard_53.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7a9f3b1216054668435905ed610b14492b8ef97d
--- /dev/null
+++ b/params_shard_53.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b887cd0581b8abb93211b29d999eb48425edfbdeaa8f5b4c577b5f4355d613a
+size 30941184
diff --git a/params_shard_54.bin b/params_shard_54.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5bbe657e9243e49001511b7f52a5bff37408eb16
--- /dev/null
+++ b/params_shard_54.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2549c5645ff58998c9b5b5ccad1ac23db939028a26b686e8bb41be2a5f4016e9
+size 29360128
diff --git a/params_shard_55.bin b/params_shard_55.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1fa13cb672fe57a6b866931c1aab259379e0f233
--- /dev/null
+++ b/params_shard_55.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48e0df1be51bdcf01b4b756b3f7eca3d6a1d56aad8bfa8f01063c57131a63110
+size 27279360
diff --git a/params_shard_56.bin b/params_shard_56.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e800dbea4421cc0a4ea3ad092717d412ae13539b
--- /dev/null
+++ b/params_shard_56.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b45d6485d50b99a8629febf8ed605bb0cb006c7ececdcf1b9e417c9223639e3d
+size 29360128
diff --git a/params_shard_57.bin b/params_shard_57.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9689082b7639f30e66e99fc13f522bddda61be95
--- /dev/null
+++ b/params_shard_57.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e00881f75b193b8e3fa9bbef235056d586c40f855ed2151dd0e8743e883662d
+size 58720256
diff --git a/params_shard_58.bin b/params_shard_58.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5e1dc54783011bb8aa9908008a41d10468d2bf15
--- /dev/null
+++ b/params_shard_58.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddbdc18d0c5f90350e5adae42d19c0c3c265071d08f444b959b9eff20e3ffa95
+size 58720256
diff --git a/params_shard_59.bin b/params_shard_59.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0635176c879a6a42fe3901b09ee43ea15669f045
--- /dev/null
+++ b/params_shard_59.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfa7ebf4ae4e90bc0e84a8b3efb55a6f0d7faf532188a04d899d94529d359ece
+size 27795456
diff --git a/params_shard_6.bin b/params_shard_6.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e26f0d32758a930db5de7a5a052b87f38e366eb0
--- /dev/null
+++ b/params_shard_6.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc5eb0b5b01db08305df889e706dac5b0da5892a4da436e7bead77e9c7c5fe9a
+size 21495808
diff --git a/params_shard_60.bin b/params_shard_60.bin
new file mode 100644
index 0000000000000000000000000000000000000000..86cc6f437b42fcfdaf8cee2357a7adb10cdd8596
--- /dev/null
+++ b/params_shard_60.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f07e83e58cc34250f21b7d42957623dd94508e4301df674eee5689cb3e368a8
+size 29360128
diff --git a/params_shard_61.bin b/params_shard_61.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d70a97890775e0658eb17ee1008ba11673bb6f7c
--- /dev/null
+++ b/params_shard_61.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31cba0f279c8d638526998c7fef545652fba366e1313744e8d682b5bd927a749
+size 58720256
diff --git a/params_shard_62.bin b/params_shard_62.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ba368b90af61407b706efa65824456449bcf7457
--- /dev/null
+++ b/params_shard_62.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e7028cd475013a8c253a2116ccf2709af7b049ca71de87f4312ee27a68c48b7
+size 27271168
diff --git a/params_shard_63.bin b/params_shard_63.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1ac756cc066401c692ebf43e24c1c33f5e53b5c7
--- /dev/null
+++ b/params_shard_63.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a45402742d0737367e944f6b564ebf2a62b2a35a0782922d2c75801f4a1df3f9
+size 58720256
diff --git a/params_shard_64.bin b/params_shard_64.bin
new file mode 100644
index 0000000000000000000000000000000000000000..baa85be68b9c00acc35241c822748477affe8792
--- /dev/null
+++ b/params_shard_64.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7430dbddfd775ac13e0c7f620cb072b7b40210d6b5d3393be700735896dc0f08
+size 30957568
diff --git a/params_shard_65.bin b/params_shard_65.bin
new file mode 100644
index 0000000000000000000000000000000000000000..08517892f59598bbac6b7bf1b29a572cd6f91420
--- /dev/null
+++ b/params_shard_65.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edf020e805ef8b5d14543fc1b439cb71cb908dc55c8d7fcdc1137abf655ecc70
+size 29360128
diff --git a/params_shard_66.bin b/params_shard_66.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c2d6e5fe89daf470d1b4f41ed64164b00d983123
--- /dev/null
+++ b/params_shard_66.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e39d89cdbb8bcd3fe1b8dbc3b10e51debbdab0f088a711508eb3d1a363b3c78
+size 33038336
diff --git a/params_shard_67.bin b/params_shard_67.bin
new file mode 100644
index 0000000000000000000000000000000000000000..727702494779cdd2c82f62841e904f01241ea530
--- /dev/null
+++ b/params_shard_67.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd9d9f0e7a5222b6bae487341b46667083319a593a3397b8ada48aa87cabc954
+size 25174016
diff --git a/params_shard_68.bin b/params_shard_68.bin
new file mode 100644
index 0000000000000000000000000000000000000000..dbcee07f456b5bde290078c47585f17e406bbecf
--- /dev/null
+++ b/params_shard_68.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edacb9bda70eb1d39b7a42971e4cf68f791f596e036d96e8d9f5d2f946b0fe88
+size 29360128
diff --git a/params_shard_69.bin b/params_shard_69.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d97d8c8d1a5350bd68a9c4857e4d0cfaa4f93d93
--- /dev/null
+++ b/params_shard_69.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c14c7887d5773a4d3264344a01f1332ac1c97b40019b38a6a7f83926d5190758
+size 58720256
diff --git a/params_shard_7.bin b/params_shard_7.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f42e29e5feaebb239dda9d29b28c5ffcb2605aaa
--- /dev/null
+++ b/params_shard_7.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02b2d78101d12607127d76429813dc4d10d3cce0deec1baa42c8349405c016cf
+size 23592960
diff --git a/params_shard_70.bin b/params_shard_70.bin
new file mode 100644
index 0000000000000000000000000000000000000000..14d7cbe38eb2c1beb41884592bb63db4aaef1a0f
--- /dev/null
+++ b/params_shard_70.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e0d89c77ae0b7a5362bd1a13f8c1dd8d5956dd37f9e324ae82afe5cf8ab13e4
+size 27262976
diff --git a/params_shard_71.bin b/params_shard_71.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b1ae3d426521cae3b84645c994f63b2a55aba86d
--- /dev/null
+++ b/params_shard_71.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:067f82c945a9c4acc88d4ac00d3c36886764b4312885ef017d5af802daee5544
+size 29360128
diff --git a/params_shard_72.bin b/params_shard_72.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ff7c734d78e59250c20f3dd5ae17c48e6a6c909c
--- /dev/null
+++ b/params_shard_72.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:327b51be399b2da94c4fae9380c7c0ef23fd5f9ec2d129b83d0ba1c889c5e39c
+size 58720256
diff --git a/params_shard_73.bin b/params_shard_73.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a884ffce20432286f26397d2f0f16cc5fa6d93ad
--- /dev/null
+++ b/params_shard_73.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4f5ed83bf504377d3d8216c2d128847e79fbc2a90e34cd4603470c7134d596
+size 29360128
diff --git a/params_shard_74.bin b/params_shard_74.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1425fa4a8ebc13c06c20bdc8ac19f899c371c849
--- /dev/null
+++ b/params_shard_74.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdf17601da53c3a37cb5deaf7bd1886638a91030f4c2bc5f5be25e34422748ef
+size 32522240
diff --git a/params_shard_75.bin b/params_shard_75.bin
new file mode 100644
index 0000000000000000000000000000000000000000..910ebf8467e8a5b48460831ddbdffa84b915558b
--- /dev/null
+++ b/params_shard_75.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cde34a8b0b8636f1b69105d49568174db8e16b79e499284918a2a723983271f6
+size 58720256
diff --git a/params_shard_76.bin b/params_shard_76.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ddc5a1c6c9b800ade0184c846b0d0ad9f4ac7a71
--- /dev/null
+++ b/params_shard_76.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42d17a2bd25ea1a50d841913316f934194d041038129aeb88d33de546831e9d7
+size 33030144
diff --git a/params_shard_77.bin b/params_shard_77.bin
new file mode 100644
index 0000000000000000000000000000000000000000..da35017460264c5e54ea39f2f5e13bfa65aa7e85
--- /dev/null
+++ b/params_shard_77.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb48bb2ad39a68e2fe24d5c309328d3d6ccb93118e0ca30572636cc1daf397e
+size 30941184
diff --git a/params_shard_78.bin b/params_shard_78.bin
new file mode 100644
index 0000000000000000000000000000000000000000..64e3067601b2c62306397609f58ae160af28798d
--- /dev/null
+++ b/params_shard_78.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdb0decdf1b04e7cd76667c150bbcbbe28411b7bcf198d9bc98ab0bda4597da9
+size 29360128
diff --git a/params_shard_79.bin b/params_shard_79.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a5e5c4577e5a88fdb062aac05f51db9a8f5b1200
--- /dev/null
+++ b/params_shard_79.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7af130e00bfeb3b823ff49fe64ccc7fef31118f678abd38b078d0c7ac1f5455d
+size 58720256
diff --git a/params_shard_8.bin b/params_shard_8.bin
new file mode 100644
index 0000000000000000000000000000000000000000..51fd0d27d6d90e06ca16082a62d6b357fa8bb4cf
--- /dev/null
+++ b/params_shard_8.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef6c4af01fa55f6b35962ac230c2e6712eca9238cce75bb79ffb3409fa589bac
+size 29360128
diff --git a/params_shard_80.bin b/params_shard_80.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2ed6f73c746143a3261b4d5d1fa26138e56be3c0
--- /dev/null
+++ b/params_shard_80.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ceb09f3a6408497fda50124311f6dc4833f1b92dbcdf939787bff7036b35db0b
+size 32530432
diff --git a/params_shard_81.bin b/params_shard_81.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4bedd009a302ba0cc69cbdc55267b0f0c1287b61
--- /dev/null
+++ b/params_shard_81.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:334f4c478c999bc06b3f9d1f404cb6796720f81486589741f13cbb702e5eac99
+size 58720256
diff --git a/params_shard_82.bin b/params_shard_82.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e7da96318b83a2e65980cb056aab4d26671dc721
--- /dev/null
+++ b/params_shard_82.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1fb6bf4d8c4de699962cf4bc921d411a748e8655280ecc75dbad76f8e566b16
+size 58720256
diff --git a/params_shard_83.bin b/params_shard_83.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7c9a4e5d7564b3118d33e5f372f8b91a89a2eb89
--- /dev/null
+++ b/params_shard_83.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f68f691b398f3df13d51251984433498a94da97f26d23df22f2911f1235fa4a
+size 27287552
diff --git a/params_shard_84.bin b/params_shard_84.bin
new file mode 100644
index 0000000000000000000000000000000000000000..45d6293b6eb443c683a637c81537ae91a8ac8a19
--- /dev/null
+++ b/params_shard_84.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61f5b9381f6144cf75f9d50d58f169b82a0a48390cea18b993dbf450feb40753
+size 58720256
diff --git a/params_shard_85.bin b/params_shard_85.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9e56abf0e54aea3ce5c55cda032ef523cada9e2a
--- /dev/null
+++ b/params_shard_85.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:060e44471c9fcec943c8a81c0d104847026b8f74c3184e7fdb420a82cd0eacf1
+size 29360128
diff --git a/params_shard_86.bin b/params_shard_86.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c1b0a3d53b860e3f2b749eb1e1ef7d0ceb9ca074
--- /dev/null
+++ b/params_shard_86.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93c1f971720daa7477a13ddc54e1599c7cec4bf4d15a5a1b8c610d744b21fb9c
+size 58720256
diff --git a/params_shard_87.bin b/params_shard_87.bin
new file mode 100644
index 0000000000000000000000000000000000000000..15afd9c3d5e023d6d29cba7e9aa7dafe42a3c213
--- /dev/null
+++ b/params_shard_87.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d6d8fbace6534e9d3f225460c6ab5a479fbf08c87b33227a6296841205227d8
+size 27811840
diff --git a/params_shard_88.bin b/params_shard_88.bin
new file mode 100644
index 0000000000000000000000000000000000000000..812c3e6f89e855ba7f0e93ed5f1da2eb5ae506c8
--- /dev/null
+++ b/params_shard_88.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:812efeec56b5bb8fc366bdca6809d9128d394879de0c16eb3ade7e4c4f8ee5be
+size 29360128
diff --git a/params_shard_89.bin b/params_shard_89.bin
new file mode 100644
index 0000000000000000000000000000000000000000..df99876ec7f027c0d8133ce92cab94438ef8f393
--- /dev/null
+++ b/params_shard_89.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e7deb3272d2f1c0f05f74dd886e1e9ebe77698e1b5afbcf385d1d71f9e8b61a
+size 29360128
diff --git a/params_shard_9.bin b/params_shard_9.bin
new file mode 100644
index 0000000000000000000000000000000000000000..43715c85abf06b2c8c022361f41f84deb1fb93cb
--- /dev/null
+++ b/params_shard_9.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4aa88e22c26f045c39f6d5420ceced4722d9355d6633044583a9b6730a47189
+size 58720256
diff --git a/params_shard_90.bin b/params_shard_90.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2a3902bc70b81ab0158924ea355df8efdce3fdbf
--- /dev/null
+++ b/params_shard_90.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fad559bf4ac787ff62265505b3e26329a572d8d644d1bc4895c432adc9dd300
+size 24158208
diff --git a/params_shard_91.bin b/params_shard_91.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f08160c46bd17bb042a0ae02856d8ee0d0b2a6b8
--- /dev/null
+++ b/params_shard_91.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e96b29ce3c195e989437daddb91388f493f12c7d4dc708146c3feb016d8ac68a
+size 29360128
diff --git a/params_shard_92.bin b/params_shard_92.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ce6f790904e7c824b66821b02d51b2ebec9828a0
--- /dev/null
+++ b/params_shard_92.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea542c75781cf6be4cc7f1a600fa4c04c1964f263e2912eb5f3becd88d627ee9
+size 58720256
diff --git a/params_shard_93.bin b/params_shard_93.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4fb3024950f4bf9d4394f5fe0655ccfdd9297f43
--- /dev/null
+++ b/params_shard_93.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd782a8d4d8b49862bdf0feb079642520b4f5f6bd6013357333a4407aa034a5f
+size 25182208
diff --git a/params_shard_94.bin b/params_shard_94.bin
new file mode 100644
index 0000000000000000000000000000000000000000..dfc2dd7308a7e7b38c0aee6bcba4ddd6f4005b18
--- /dev/null
+++ b/params_shard_94.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6189f53e03b26c17ca6ff4503ef94a023e472eb38e2cbe6bc4d88b87e433a798
+size 29360128
diff --git a/params_shard_95.bin b/params_shard_95.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bd31708a8dca60f73fb1aed141ff7f084fd1c57f
--- /dev/null
+++ b/params_shard_95.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db8d10dbcc5e5e8b5b3a625e81e1bd0c00c9309eb61925cddc85fbcec04f0bc5
+size 31997952
diff --git a/params_shard_96.bin b/params_shard_96.bin
new file mode 100644
index 0000000000000000000000000000000000000000..82a825f9ab70b93b003144271bfd5bc073900514
--- /dev/null
+++ b/params_shard_96.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef99330e08755148a206e87da0dbf2e6bbf607990266c8a733060f3068c194a2
+size 29360128
diff --git a/params_shard_97.bin b/params_shard_97.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b1888428b1b0dca1a8278733af65a71802225e7f
--- /dev/null
+++ b/params_shard_97.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80d8df0a0536e0d13631b643f5a2c17464aed3183254b18f3047c01d57f1f355
+size 29360128
diff --git a/params_shard_98.bin b/params_shard_98.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f2dcdcdff9615eb68edaa4f6ffd7a35e6a1ff4c1
--- /dev/null
+++ b/params_shard_98.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61e83f1d661cad1f93bffff1a668d134ece6ad913c883699089cc838eb59c726
+size 29360128
diff --git a/params_shard_99.bin b/params_shard_99.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7e785508e1b8d706f9d920846c11da01a43d33f1
--- /dev/null
+++ b/params_shard_99.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f03d1797bbb512f7ef7af0ed23d75a3d6fa49021453de680873076c119e810d0
+size 29917184
diff --git a/server.py b/server.py
new file mode 100644
index 0000000000000000000000000000000000000000..c44a1e6bacacea2950ba14cace7962eb4f6bfbad
--- /dev/null
+++ b/server.py
@@ -0,0 +1,11 @@
+from http.server import HTTPServer, SimpleHTTPRequestHandler
+
+class CORSRequestHandler(SimpleHTTPRequestHandler):
+ def end_headers(self):
+ self.send_header('Access-Control-Allow-Origin', '*')
+ self.send_header('Access-Control-Allow-Methods', 'GET')
+ self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate')
+ return super().end_headers()
+
+print("Serving MLC model at http://localhost:8000")
+HTTPServer(('localhost', 8000), CORSRequestHandler).serve_forever()
diff --git a/tensor-cache-b16.json b/tensor-cache-b16.json
new file mode 100644
index 0000000000000000000000000000000000000000..bae64f4cded99e52a27c3dbdc6111316d099d038
--- /dev/null
+++ b/tensor-cache-b16.json
@@ -0,0 +1,4375 @@
+{
+ "metadata": {
+ "ParamSize": 325,
+ "ParamBytes": 5019827200.0,
+ "BitsPerParam": 1.5790556154328332
+ },
+ "records": [
+ {
+ "dataPath": "params_shard_0.bin",
+ "format": "raw-shard",
+ "nbytes": 262674432,
+ "records": [
+ {
+ "name": "lm_head.q_weight",
+ "shape": [
+ 128259,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 262674432,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1a8db284cf568c04eb50e0a61670b435"
+ },
+ {
+ "dataPath": "params_shard_1.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "3ba101a32a60e4957d4f341ed2474d54"
+ },
+ {
+ "dataPath": "params_shard_2.bin",
+ "format": "raw-shard",
+ "nbytes": 32842496,
+ "records": [
+ {
+ "name": "lm_head.q_scale",
+ "shape": [
+ 128259,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 32834304,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.0.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 32834304
+ }
+ ],
+ "md5sum": "25b9648a2f7f4bd916a635505f8ed165"
+ },
+ {
+ "dataPath": "params_shard_3.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "bde72072557ec421c0ff4ae82ce96784"
+ },
+ {
+ "dataPath": "params_shard_4.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 19922944
+ },
+ {
+ "name": "model.layers.1.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 21495808
+ }
+ ],
+ "md5sum": "0bc04e510210d12f4a773ddfcf2012ae"
+ },
+ {
+ "dataPath": "params_shard_5.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "3b7c7544320febb20cc7b3a3310e4415"
+ },
+ {
+ "dataPath": "params_shard_6.bin",
+ "format": "raw-shard",
+ "nbytes": 21495808,
+ "records": [
+ {
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 14155776
+ }
+ ],
+ "md5sum": "e5bbdd5d1d201a4ae1bb0828a8a52e60"
+ },
+ {
+ "dataPath": "params_shard_7.bin",
+ "format": "raw-shard",
+ "nbytes": 23592960,
+ "records": [
+ {
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 22544384
+ }
+ ],
+ "md5sum": "a8fd0fb8a5c610ce1c66b6c10d82b030"
+ },
+ {
+ "dataPath": "params_shard_8.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ea30f6795d29feeae83af89e7e8fa85c"
+ },
+ {
+ "dataPath": "params_shard_9.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "746157e8b6c2189cc1f5a8d460a445d3"
+ },
+ {
+ "dataPath": "params_shard_10.bin",
+ "format": "raw-shard",
+ "nbytes": 27271168,
+ "records": [
+ {
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 22544384
+ },
+ {
+ "name": "model.layers.13.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 23592960
+ },
+ {
+ "name": "model.layers.13.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 23601152
+ }
+ ],
+ "md5sum": "edcb64768413917f8fdb66fd7c632851"
+ },
+ {
+ "dataPath": "params_shard_11.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.14.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "0247610b68f5900228a65256358f0120"
+ },
+ {
+ "dataPath": "params_shard_12.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4f7e7f8bed5b1ee976f4b7dfe143223d"
+ },
+ {
+ "dataPath": "params_shard_13.bin",
+ "format": "raw-shard",
+ "nbytes": 32505856,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 19922944
+ },
+ {
+ "name": "model.layers.14.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 21495808
+ },
+ {
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 25165824
+ }
+ ],
+ "md5sum": "9b95ca4090dbe045baa76e5c0d444fcf"
+ },
+ {
+ "dataPath": "params_shard_14.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "083256552395630b71e265052c56b44f"
+ },
+ {
+ "dataPath": "params_shard_15.bin",
+ "format": "raw-shard",
+ "nbytes": 21495808,
+ "records": [
+ {
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 14155776
+ }
+ ],
+ "md5sum": "0fb7eba5c12daa5c6c559d3351c3ec89"
+ },
+ {
+ "dataPath": "params_shard_16.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "06e904967db15497efbedf6df26d70d2"
+ },
+ {
+ "dataPath": "params_shard_17.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "83e68fa78e4048b638bf27f4ceccc00a"
+ },
+ {
+ "dataPath": "params_shard_18.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1f83fe0c80b8e8e2b8741f4b1792206c"
+ },
+ {
+ "dataPath": "params_shard_19.bin",
+ "format": "raw-shard",
+ "nbytes": 30949376,
+ "records": [
+ {
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.16.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.16.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.17.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 17833984
+ },
+ {
+ "name": "model.layers.17.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 21504000
+ },
+ {
+ "name": "model.layers.17.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 29892608
+ },
+ {
+ "name": "model.layers.18.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 30941184
+ }
+ ],
+ "md5sum": "512034d4872f6b17183d1c51b93caa7d"
+ },
+ {
+ "dataPath": "params_shard_20.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.19.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e1dccd0df2c5d770645275ea79ded18a"
+ },
+ {
+ "dataPath": "params_shard_21.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "cdd79ac3215afba2d0a8f249fe99bd48"
+ },
+ {
+ "dataPath": "params_shard_22.bin",
+ "format": "raw-shard",
+ "nbytes": 32522240,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.19.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.19.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.19.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 14688256
+ },
+ {
+ "name": "model.layers.19.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 27271168
+ },
+ {
+ "name": "model.layers.2.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 28844032
+ },
+ {
+ "name": "model.layers.2.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 32514048
+ }
+ ],
+ "md5sum": "e45b09ae4128a2291092f533ff8af6b2"
+ },
+ {
+ "dataPath": "params_shard_23.bin",
+ "format": "raw-shard",
+ "nbytes": 23592960,
+ "records": [
+ {
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 22020096
+ }
+ ],
+ "md5sum": "ee6e8e015b80bb375568dbd0a9385600"
+ },
+ {
+ "dataPath": "params_shard_24.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "05228c3e13478e4248b37d8d9305b779"
+ },
+ {
+ "dataPath": "params_shard_25.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "75588b4068058bea27bf5beb45573db1"
+ },
+ {
+ "dataPath": "params_shard_26.bin",
+ "format": "raw-shard",
+ "nbytes": 31997952,
+ "records": [
+ {
+ "name": "model.layers.20.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.20.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.21.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.21.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 26738688
+ },
+ {
+ "name": "model.layers.22.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 28311552
+ },
+ {
+ "name": "model.layers.22.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 28319744
+ },
+ {
+ "name": "model.layers.23.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 28327936
+ }
+ ],
+ "md5sum": "d78184ef263ba7c0cd48bf8992a8763a"
+ },
+ {
+ "dataPath": "params_shard_27.bin",
+ "format": "raw-shard",
+ "nbytes": 21504000,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.23.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.23.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.23.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 19931136
+ }
+ ],
+ "md5sum": "a7e024f53dcbb8c403dc5846471e1230"
+ },
+ {
+ "dataPath": "params_shard_28.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "2f555714241ccfbc775ed5dd148c141a"
+ },
+ {
+ "dataPath": "params_shard_29.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "5ed3a430e7bab4e5f4b57a5c6de411a5"
+ },
+ {
+ "dataPath": "params_shard_30.bin",
+ "format": "raw-shard",
+ "nbytes": 31989760,
+ "records": [
+ {
+ "name": "model.layers.25.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.25.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.26.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.26.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.26.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 26746880
+ },
+ {
+ "name": "model.layers.27.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 28319744
+ }
+ ],
+ "md5sum": "6a0b5ed94f61522b1d685e08d79fb8b3"
+ },
+ {
+ "dataPath": "params_shard_31.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.29.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1d431ad2068ece3fe4d5c79a3299d003"
+ },
+ {
+ "dataPath": "params_shard_32.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "104c92ece5860e60e5ddf41b21f33260"
+ },
+ {
+ "dataPath": "params_shard_33.bin",
+ "format": "raw-shard",
+ "nbytes": 28852224,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.28.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.28.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 19931136
+ },
+ {
+ "name": "model.layers.29.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 21504000
+ },
+ {
+ "name": "model.layers.29.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 21512192
+ }
+ ],
+ "md5sum": "5e15cd3eef6c8fa6b29bacfc72085941"
+ },
+ {
+ "dataPath": "params_shard_34.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.30.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "42d39b7c0a48c578d45490617652f94a"
+ },
+ {
+ "dataPath": "params_shard_35.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.30.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "2f52a6a6b635f360945cd50c6f77d450"
+ },
+ {
+ "dataPath": "params_shard_36.bin",
+ "format": "raw-shard",
+ "nbytes": 32530432,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.3.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.30.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.30.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 11018240
+ },
+ {
+ "name": "model.layers.31.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 18358272
+ },
+ {
+ "name": "model.layers.31.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 18366464
+ },
+ {
+ "name": "model.layers.31.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 18374656
+ },
+ {
+ "name": "model.layers.31.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 30957568
+ }
+ ],
+ "md5sum": "f090e66edd23dcf73b05d64accde1148"
+ },
+ {
+ "dataPath": "params_shard_37.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "52c607eada0544fd8b300e3bafe85b37"
+ },
+ {
+ "dataPath": "params_shard_38.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "71459776ec3fe17d8912f3f45d1633e5"
+ },
+ {
+ "dataPath": "params_shard_39.bin",
+ "format": "raw-shard",
+ "nbytes": 28844032,
+ "records": [
+ {
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.6.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 21495808
+ },
+ {
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 21504000
+ }
+ ],
+ "md5sum": "98cad2c978ee84e32aea8232f5d9ca2e"
+ },
+ {
+ "dataPath": "params_shard_40.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "eff52722d4f6bbb88925a458be7d22d6"
+ },
+ {
+ "dataPath": "params_shard_41.bin",
+ "format": "raw-shard",
+ "nbytes": 21495808,
+ "records": [
+ {
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 14155776
+ }
+ ],
+ "md5sum": "8be78c79bcbc7dd59723a564cfbc72f6"
+ },
+ {
+ "dataPath": "params_shard_42.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "99167fb62218d3e26034c197f77a8aa3"
+ },
+ {
+ "dataPath": "params_shard_43.bin",
+ "format": "raw-shard",
+ "nbytes": 262674432,
+ "records": [
+ {
+ "name": "model.embed_tokens.q_weight",
+ "shape": [
+ 128259,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 262674432,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "be6213096f7954a8c08120cb6960f78e"
+ },
+ {
+ "dataPath": "params_shard_44.bin",
+ "format": "raw-shard",
+ "nbytes": 32834304,
+ "records": [
+ {
+ "name": "model.embed_tokens.q_scale",
+ "shape": [
+ 128259,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 32834304,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1c9142af090a3c0ad04a40a28500fd99"
+ },
+ {
+ "dataPath": "params_shard_45.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c7d1f88d42573d7bba4ab5c88acee7b7"
+ },
+ {
+ "dataPath": "params_shard_46.bin",
+ "format": "raw-shard",
+ "nbytes": 30957568,
+ "records": [
+ {
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.8.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.9.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 21504000
+ },
+ {
+ "name": "model.layers.0.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 21512192
+ },
+ {
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 21520384
+ },
+ {
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 29908992
+ }
+ ],
+ "md5sum": "05aaab3081fcc13134141848e282aa12"
+ },
+ {
+ "dataPath": "params_shard_47.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "9ade4887e2074741f712d3cab5823322"
+ },
+ {
+ "dataPath": "params_shard_48.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "00688a4fd29532f4caec6b5b509dcbee"
+ },
+ {
+ "dataPath": "params_shard_49.bin",
+ "format": "raw-shard",
+ "nbytes": 28860416,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.11.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.11.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.11.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 11018240
+ },
+ {
+ "name": "model.layers.12.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 11026432
+ },
+ {
+ "name": "model.layers.12.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 11034624
+ },
+ {
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 14704640
+ },
+ {
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 27287552
+ }
+ ],
+ "md5sum": "11087a54cf33aefa7161e775dce54f98"
+ },
+ {
+ "dataPath": "params_shard_50.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.15.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4121441eb5031984ac17c004c3bc8942"
+ },
+ {
+ "dataPath": "params_shard_51.bin",
+ "format": "raw-shard",
+ "nbytes": 27262976,
+ "records": [
+ {
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.15.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 25690112
+ }
+ ],
+ "md5sum": "8969128f561da2d8b4e77e8d1ed878f1"
+ },
+ {
+ "dataPath": "params_shard_52.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "cf2d41cca20a7323d0bf3668f3544747"
+ },
+ {
+ "dataPath": "params_shard_53.bin",
+ "format": "raw-shard",
+ "nbytes": 30941184,
+ "records": [
+ {
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.17.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.17.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 9445376
+ },
+ {
+ "name": "model.layers.17.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 16785408
+ },
+ {
+ "name": "model.layers.17.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 29368320
+ }
+ ],
+ "md5sum": "6b824c284774312b0d1ba3b179ce3423"
+ },
+ {
+ "dataPath": "params_shard_54.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.19.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "545ffeb92d1a3a9fed54475373707bd8"
+ },
+ {
+ "dataPath": "params_shard_55.bin",
+ "format": "raw-shard",
+ "nbytes": 27279360,
+ "records": [
+ {
+ "name": "model.layers.18.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.18.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.18.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.18.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 22544384
+ },
+ {
+ "name": "model.layers.19.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 23592960
+ },
+ {
+ "name": "model.layers.19.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27262976
+ },
+ {
+ "name": "model.layers.2.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 27271168
+ }
+ ],
+ "md5sum": "16adcd1bcd0d080330eca9b1570f7f71"
+ },
+ {
+ "dataPath": "params_shard_56.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "431821dd1c8fe6b69b7384162f928dac"
+ },
+ {
+ "dataPath": "params_shard_57.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "8d4f5a09c2c6eca9bbd67de816f7ac6a"
+ },
+ {
+ "dataPath": "params_shard_58.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.22.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "791906fa74ff6201182223773d8f52ba"
+ },
+ {
+ "dataPath": "params_shard_59.bin",
+ "format": "raw-shard",
+ "nbytes": 27795456,
+ "records": [
+ {
+ "name": "model.layers.20.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.20.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.21.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.21.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 9445376
+ },
+ {
+ "name": "model.layers.21.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 13115392
+ },
+ {
+ "name": "model.layers.22.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 20455424
+ }
+ ],
+ "md5sum": "e58836bbb970d6e5a867c49aa72ab1a3"
+ },
+ {
+ "dataPath": "params_shard_60.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "221e435ae0e0ba172d3fe40b371366d2"
+ },
+ {
+ "dataPath": "params_shard_61.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d411fb0e5f70728fb1d078a1b5f453f3"
+ },
+ {
+ "dataPath": "params_shard_62.bin",
+ "format": "raw-shard",
+ "nbytes": 27271168,
+ "records": [
+ {
+ "name": "model.layers.22.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.22.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.23.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.23.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.23.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 22552576
+ },
+ {
+ "name": "model.layers.24.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 23601152
+ }
+ ],
+ "md5sum": "af3ad918225372ce0a49a92916160672"
+ },
+ {
+ "dataPath": "params_shard_63.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e7ab23102cbf9b825fd4908c42a7d271"
+ },
+ {
+ "dataPath": "params_shard_64.bin",
+ "format": "raw-shard",
+ "nbytes": 30957568,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.24.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.24.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.24.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 19931136
+ },
+ {
+ "name": "model.layers.25.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 21504000
+ },
+ {
+ "name": "model.layers.25.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 21512192
+ },
+ {
+ "name": "model.layers.25.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 29900800
+ },
+ {
+ "name": "model.layers.26.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 30949376
+ }
+ ],
+ "md5sum": "d16feaf8957f72286374c7c7dea778ee"
+ },
+ {
+ "dataPath": "params_shard_65.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.29.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4ec8dff5d06870bff3d4f8044cdef2a5"
+ },
+ {
+ "dataPath": "params_shard_66.bin",
+ "format": "raw-shard",
+ "nbytes": 33038336,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.26.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.26.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 15728640
+ },
+ {
+ "name": "model.layers.29.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 16777216
+ },
+ {
+ "name": "model.layers.29.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 20447232
+ },
+ {
+ "name": "model.layers.29.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 20455424
+ }
+ ],
+ "md5sum": "8bf88524e873c3c15f8ce9d1e4ab4384"
+ },
+ {
+ "dataPath": "params_shard_67.bin",
+ "format": "raw-shard",
+ "nbytes": 25174016,
+ "records": [
+ {
+ "name": "model.layers.29.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 1572864
+ },
+ {
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 15728640
+ },
+ {
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "model.layers.30.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 25165824
+ }
+ ],
+ "md5sum": "9af87b8a9dc8e8319a311dccf596b747"
+ },
+ {
+ "dataPath": "params_shard_68.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d99f164f744f5d26fadac05bb4b9d7a9"
+ },
+ {
+ "dataPath": "params_shard_69.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "10d0645f08b1772197615081aeae671c"
+ },
+ {
+ "dataPath": "params_shard_70.bin",
+ "format": "raw-shard",
+ "nbytes": 27262976,
+ "records": [
+ {
+ "name": "model.layers.30.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.30.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.30.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.30.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 22544384
+ },
+ {
+ "name": "model.layers.31.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 23592960
+ }
+ ],
+ "md5sum": "72adbac5687d46d5b21e01c4ea6ebca2"
+ },
+ {
+ "dataPath": "params_shard_71.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.4.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "221bba05efcce9895bf3a1fd65a7dea7"
+ },
+ {
+ "dataPath": "params_shard_72.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4a1e7a56eb466989e9b999c648ecee32"
+ },
+ {
+ "dataPath": "params_shard_73.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "6ee645cad39ca412f5399a4c8a8f773b"
+ },
+ {
+ "dataPath": "params_shard_74.bin",
+ "format": "raw-shard",
+ "nbytes": 32522240,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 11010048
+ },
+ {
+ "name": "model.layers.4.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 18350080
+ },
+ {
+ "name": "model.layers.5.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 18358272
+ },
+ {
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 18366464
+ },
+ {
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 30949376
+ }
+ ],
+ "md5sum": "e13680d69bfd480a0047f78e2d663fcd"
+ },
+ {
+ "dataPath": "params_shard_75.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f2578d375b31a2b3c288760c71b91325"
+ },
+ {
+ "dataPath": "params_shard_76.bin",
+ "format": "raw-shard",
+ "nbytes": 33030144,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 3670016
+ },
+ {
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 12058624
+ },
+ {
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 20447232
+ }
+ ],
+ "md5sum": "a5f785ce02bedfed686c5569d1750f0a"
+ },
+ {
+ "dataPath": "params_shard_77.bin",
+ "format": "raw-shard",
+ "nbytes": 30941184,
+ "records": [
+ {
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.9.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 1572864
+ },
+ {
+ "name": "model.layers.0.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 1581056
+ }
+ ],
+ "md5sum": "ef72a7b797e70980344ca361f597b75a"
+ },
+ {
+ "dataPath": "params_shard_78.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.10.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "fb56a3b0f0fe0c1bc523b2e06672f0b8"
+ },
+ {
+ "dataPath": "params_shard_79.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "cb9d334bd1ee278ed3b95f9632f3a8b0"
+ },
+ {
+ "dataPath": "params_shard_80.bin",
+ "format": "raw-shard",
+ "nbytes": 32530432,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.1.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 3670016
+ },
+ {
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 3678208
+ },
+ {
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 12066816
+ },
+ {
+ "name": "model.layers.10.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 13115392
+ },
+ {
+ "name": "model.layers.10.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 13123584
+ },
+ {
+ "name": "model.layers.10.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 16793600
+ },
+ {
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 16801792
+ },
+ {
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 24141824
+ }
+ ],
+ "md5sum": "44a141488093aef939ff54f598ab335d"
+ },
+ {
+ "dataPath": "params_shard_81.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "691c093af629bba4e5e92a37b380aecb"
+ },
+ {
+ "dataPath": "params_shard_82.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "bfb0485c993d15439860f8c302f320ac"
+ },
+ {
+ "dataPath": "params_shard_83.bin",
+ "format": "raw-shard",
+ "nbytes": 27287552,
+ "records": [
+ {
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 1048576
+ },
+ {
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 16777216
+ },
+ {
+ "name": "model.layers.14.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 17825792
+ },
+ {
+ "name": "model.layers.15.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 17833984
+ },
+ {
+ "name": "model.layers.15.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 17842176
+ },
+ {
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 17850368
+ },
+ {
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 26238976
+ }
+ ],
+ "md5sum": "77b213e7f2e06932be4748e74b48c463"
+ },
+ {
+ "dataPath": "params_shard_84.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a650420cd77953d673572db9272e3671"
+ },
+ {
+ "dataPath": "params_shard_85.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "07461f2dd52e7b9036a8bee83dae63f6"
+ },
+ {
+ "dataPath": "params_shard_86.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c671ded30b1d8ec17407dbc9f9036d69"
+ },
+ {
+ "dataPath": "params_shard_87.bin",
+ "format": "raw-shard",
+ "nbytes": 27811840,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.16.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.18.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.19.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 7356416
+ },
+ {
+ "name": "model.layers.19.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 15745024
+ },
+ {
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 16793600
+ },
+ {
+ "name": "model.layers.20.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 24133632
+ },
+ {
+ "name": "model.layers.20.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 24141824
+ }
+ ],
+ "md5sum": "15ce52123d37a9a2429cbed81d1da8af"
+ },
+ {
+ "dataPath": "params_shard_88.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4db3fff85cfa59cba995c4ddccde5105"
+ },
+ {
+ "dataPath": "params_shard_89.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "11e8e8796d457a6ec871e4cd3e116852"
+ },
+ {
+ "dataPath": "params_shard_90.bin",
+ "format": "raw-shard",
+ "nbytes": 24158208,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.20.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.21.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.22.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 7356416
+ },
+ {
+ "name": "model.layers.22.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 15745024
+ },
+ {
+ "name": "model.layers.24.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 16793600
+ },
+ {
+ "name": "model.layers.25.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 16801792
+ },
+ {
+ "name": "model.layers.25.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 16809984
+ },
+ {
+ "name": "model.layers.26.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 20480000
+ },
+ {
+ "name": "model.layers.27.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 24150016
+ }
+ ],
+ "md5sum": "c60669008ff41dc21607053d1eccaebc"
+ },
+ {
+ "dataPath": "params_shard_91.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.28.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d9dd04ed0cce96e976e69e453ce3251b"
+ },
+ {
+ "dataPath": "params_shard_92.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.28.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "468e0375e3a6f57e36dd82ee512576bc"
+ },
+ {
+ "dataPath": "params_shard_93.bin",
+ "format": "raw-shard",
+ "nbytes": 25182208,
+ "records": [
+ {
+ "name": "model.layers.27.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.28.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.28.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.28.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 17833984
+ },
+ {
+ "name": "model.layers.28.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 25174016
+ }
+ ],
+ "md5sum": "308be9ecf39c67b265f6e34184e88a38"
+ },
+ {
+ "dataPath": "params_shard_94.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "734041d34a5b53e3ff858a8917397737"
+ },
+ {
+ "dataPath": "params_shard_95.bin",
+ "format": "raw-shard",
+ "nbytes": 31997952,
+ "records": [
+ {
+ "name": "model.layers.28.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.28.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.29.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.29.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 17825792
+ },
+ {
+ "name": "model.layers.3.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 18874368
+ },
+ {
+ "name": "model.layers.3.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 18882560
+ },
+ {
+ "name": "model.layers.30.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 22552576
+ },
+ {
+ "name": "model.layers.31.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 22560768
+ },
+ {
+ "name": "model.layers.31.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 30949376
+ }
+ ],
+ "md5sum": "3bc9a61d309ad791ebffda4a3dd705b2"
+ },
+ {
+ "dataPath": "params_shard_96.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.5.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "fb55bea1231a08cf2199b01a1d0bb993"
+ },
+ {
+ "dataPath": "params_shard_97.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.7.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "23ce068b8d009d834336d558dcbe0474"
+ },
+ {
+ "dataPath": "params_shard_98.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.8.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "725c24e682090873b736262152f643d3"
+ },
+ {
+ "dataPath": "params_shard_99.bin",
+ "format": "raw-shard",
+ "nbytes": 29917184,
+ "records": [
+ {
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.5.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.5.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 9445376
+ },
+ {
+ "name": "model.layers.6.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 13115392
+ },
+ {
+ "name": "model.layers.7.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 13123584
+ },
+ {
+ "name": "model.layers.7.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 16793600
+ },
+ {
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 16801792
+ },
+ {
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 25190400
+ },
+ {
+ "name": "model.layers.8.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 26238976
+ },
+ {
+ "name": "model.layers.8.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 26247168
+ }
+ ],
+ "md5sum": "e62f0ad863b9ba04638fee26473a3eb4"
+ },
+ {
+ "dataPath": "params_shard_100.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4b661f8f6854044bd00c54135afbb728"
+ },
+ {
+ "dataPath": "params_shard_101.bin",
+ "format": "raw-shard",
+ "nbytes": 33079296,
+ "records": [
+ {
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1572864,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 23592960
+ },
+ {
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 31981568
+ },
+ {
+ "name": "model.norm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 33030144
+ },
+ {
+ "name": "model.layers.1.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 33038336
+ },
+ {
+ "name": "model.layers.12.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 33046528
+ },
+ {
+ "name": "model.layers.13.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 33054720
+ },
+ {
+ "name": "model.layers.14.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 33062912
+ },
+ {
+ "name": "model.layers.17.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 33071104
+ }
+ ],
+ "md5sum": "8a877a1c53c0c585a003594762344a03"
+ },
+ {
+ "dataPath": "params_shard_102.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.22.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "27b02096251b20d61801b2c18d38884c"
+ },
+ {
+ "dataPath": "params_shard_103.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d125e0787255ba412bba0582ec4113ba"
+ },
+ {
+ "dataPath": "params_shard_104.bin",
+ "format": "raw-shard",
+ "nbytes": 26214400,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.21.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 3670016
+ },
+ {
+ "name": "model.layers.21.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 12058624
+ },
+ {
+ "name": "model.layers.22.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.24.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 16777216
+ },
+ {
+ "name": "model.layers.24.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 25165824
+ }
+ ],
+ "md5sum": "0d3b524da5435b78d348b7cbf6702906"
+ },
+ {
+ "dataPath": "params_shard_105.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.9.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "720114b139ff44092755d63d2a50512d"
+ },
+ {
+ "dataPath": "params_shard_106.bin",
+ "format": "raw-shard",
+ "nbytes": 29900800,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.27.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 15728640
+ },
+ {
+ "name": "model.layers.4.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 16777216
+ },
+ {
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 16785408
+ },
+ {
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 1048576,
+ "byteOffset": 25174016
+ },
+ {
+ "name": "model.layers.7.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 8192,
+ "byteOffset": 26222592
+ },
+ {
+ "name": "model.layers.9.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "bfloat16",
+ "format": "raw",
+ "nbytes": 3670016,
+ "byteOffset": 26230784
+ }
+ ],
+ "md5sum": "48860c8bc90660d3ba529db23e3df4ad"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/tensor-cache.json b/tensor-cache.json
new file mode 100644
index 0000000000000000000000000000000000000000..8109164fd0cf2cd70dfd815f4d3edd768c75e0a7
--- /dev/null
+++ b/tensor-cache.json
@@ -0,0 +1,4375 @@
+{
+ "metadata": {
+ "ParamSize": 325,
+ "ParamBytes": 5019827200.0,
+ "BitsPerParam": 1.5790556154328332
+ },
+ "records": [
+ {
+ "dataPath": "params_shard_0.bin",
+ "format": "raw-shard",
+ "nbytes": 262674432,
+ "records": [
+ {
+ "name": "lm_head.q_weight",
+ "shape": [
+ 128259,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 262674432,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1a8db284cf568c04eb50e0a61670b435"
+ },
+ {
+ "dataPath": "params_shard_1.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "3ba101a32a60e4957d4f341ed2474d54"
+ },
+ {
+ "dataPath": "params_shard_2.bin",
+ "format": "raw-shard",
+ "nbytes": 32842496,
+ "records": [
+ {
+ "name": "lm_head.q_scale",
+ "shape": [
+ 128259,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 32834304,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.0.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 32834304
+ }
+ ],
+ "md5sum": "25b9648a2f7f4bd916a635505f8ed165"
+ },
+ {
+ "dataPath": "params_shard_3.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "bde72072557ec421c0ff4ae82ce96784"
+ },
+ {
+ "dataPath": "params_shard_4.bin",
+ "format": "raw-shard",
+ "nbytes": 25165824,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 19922944
+ },
+ {
+ "name": "model.layers.1.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 21495808
+ }
+ ],
+ "md5sum": "0bc04e510210d12f4a773ddfcf2012ae"
+ },
+ {
+ "dataPath": "params_shard_5.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "3b7c7544320febb20cc7b3a3310e4415"
+ },
+ {
+ "dataPath": "params_shard_6.bin",
+ "format": "raw-shard",
+ "nbytes": 21495808,
+ "records": [
+ {
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 14155776
+ }
+ ],
+ "md5sum": "e5bbdd5d1d201a4ae1bb0828a8a52e60"
+ },
+ {
+ "dataPath": "params_shard_7.bin",
+ "format": "raw-shard",
+ "nbytes": 23592960,
+ "records": [
+ {
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 22544384
+ }
+ ],
+ "md5sum": "a8fd0fb8a5c610ce1c66b6c10d82b030"
+ },
+ {
+ "dataPath": "params_shard_8.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ea30f6795d29feeae83af89e7e8fa85c"
+ },
+ {
+ "dataPath": "params_shard_9.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "746157e8b6c2189cc1f5a8d460a445d3"
+ },
+ {
+ "dataPath": "params_shard_10.bin",
+ "format": "raw-shard",
+ "nbytes": 27271168,
+ "records": [
+ {
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 22544384
+ },
+ {
+ "name": "model.layers.13.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 23592960
+ },
+ {
+ "name": "model.layers.13.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 23601152
+ }
+ ],
+ "md5sum": "edcb64768413917f8fdb66fd7c632851"
+ },
+ {
+ "dataPath": "params_shard_11.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.14.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "0247610b68f5900228a65256358f0120"
+ },
+ {
+ "dataPath": "params_shard_12.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4f7e7f8bed5b1ee976f4b7dfe143223d"
+ },
+ {
+ "dataPath": "params_shard_13.bin",
+ "format": "raw-shard",
+ "nbytes": 32505856,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 19922944
+ },
+ {
+ "name": "model.layers.14.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 21495808
+ },
+ {
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 25165824
+ }
+ ],
+ "md5sum": "9b95ca4090dbe045baa76e5c0d444fcf"
+ },
+ {
+ "dataPath": "params_shard_14.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "083256552395630b71e265052c56b44f"
+ },
+ {
+ "dataPath": "params_shard_15.bin",
+ "format": "raw-shard",
+ "nbytes": 21495808,
+ "records": [
+ {
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 14155776
+ }
+ ],
+ "md5sum": "0fb7eba5c12daa5c6c559d3351c3ec89"
+ },
+ {
+ "dataPath": "params_shard_16.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "06e904967db15497efbedf6df26d70d2"
+ },
+ {
+ "dataPath": "params_shard_17.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "83e68fa78e4048b638bf27f4ceccc00a"
+ },
+ {
+ "dataPath": "params_shard_18.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1f83fe0c80b8e8e2b8741f4b1792206c"
+ },
+ {
+ "dataPath": "params_shard_19.bin",
+ "format": "raw-shard",
+ "nbytes": 30949376,
+ "records": [
+ {
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.16.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.16.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.17.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 17833984
+ },
+ {
+ "name": "model.layers.17.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 21504000
+ },
+ {
+ "name": "model.layers.17.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 29892608
+ },
+ {
+ "name": "model.layers.18.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 30941184
+ }
+ ],
+ "md5sum": "512034d4872f6b17183d1c51b93caa7d"
+ },
+ {
+ "dataPath": "params_shard_20.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.19.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e1dccd0df2c5d770645275ea79ded18a"
+ },
+ {
+ "dataPath": "params_shard_21.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "cdd79ac3215afba2d0a8f249fe99bd48"
+ },
+ {
+ "dataPath": "params_shard_22.bin",
+ "format": "raw-shard",
+ "nbytes": 32522240,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.19.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.19.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.19.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 14688256
+ },
+ {
+ "name": "model.layers.19.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 27271168
+ },
+ {
+ "name": "model.layers.2.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 28844032
+ },
+ {
+ "name": "model.layers.2.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 32514048
+ }
+ ],
+ "md5sum": "e45b09ae4128a2291092f533ff8af6b2"
+ },
+ {
+ "dataPath": "params_shard_23.bin",
+ "format": "raw-shard",
+ "nbytes": 23592960,
+ "records": [
+ {
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 22020096
+ }
+ ],
+ "md5sum": "ee6e8e015b80bb375568dbd0a9385600"
+ },
+ {
+ "dataPath": "params_shard_24.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "05228c3e13478e4248b37d8d9305b779"
+ },
+ {
+ "dataPath": "params_shard_25.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "75588b4068058bea27bf5beb45573db1"
+ },
+ {
+ "dataPath": "params_shard_26.bin",
+ "format": "raw-shard",
+ "nbytes": 31997952,
+ "records": [
+ {
+ "name": "model.layers.20.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.20.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.21.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.21.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 26738688
+ },
+ {
+ "name": "model.layers.22.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 28311552
+ },
+ {
+ "name": "model.layers.22.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 28319744
+ },
+ {
+ "name": "model.layers.23.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 28327936
+ }
+ ],
+ "md5sum": "d78184ef263ba7c0cd48bf8992a8763a"
+ },
+ {
+ "dataPath": "params_shard_27.bin",
+ "format": "raw-shard",
+ "nbytes": 21504000,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.23.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.23.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.23.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 19931136
+ }
+ ],
+ "md5sum": "a7e024f53dcbb8c403dc5846471e1230"
+ },
+ {
+ "dataPath": "params_shard_28.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "2f555714241ccfbc775ed5dd148c141a"
+ },
+ {
+ "dataPath": "params_shard_29.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "5ed3a430e7bab4e5f4b57a5c6de411a5"
+ },
+ {
+ "dataPath": "params_shard_30.bin",
+ "format": "raw-shard",
+ "nbytes": 31989760,
+ "records": [
+ {
+ "name": "model.layers.25.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.25.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.26.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.26.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.26.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 26746880
+ },
+ {
+ "name": "model.layers.27.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 28319744
+ }
+ ],
+ "md5sum": "6a0b5ed94f61522b1d685e08d79fb8b3"
+ },
+ {
+ "dataPath": "params_shard_31.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.29.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1d431ad2068ece3fe4d5c79a3299d003"
+ },
+ {
+ "dataPath": "params_shard_32.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "104c92ece5860e60e5ddf41b21f33260"
+ },
+ {
+ "dataPath": "params_shard_33.bin",
+ "format": "raw-shard",
+ "nbytes": 28852224,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.28.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.28.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 19931136
+ },
+ {
+ "name": "model.layers.29.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 21504000
+ },
+ {
+ "name": "model.layers.29.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 21512192
+ }
+ ],
+ "md5sum": "5e15cd3eef6c8fa6b29bacfc72085941"
+ },
+ {
+ "dataPath": "params_shard_34.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.30.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "42d39b7c0a48c578d45490617652f94a"
+ },
+ {
+ "dataPath": "params_shard_35.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.30.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "2f52a6a6b635f360945cd50c6f77d450"
+ },
+ {
+ "dataPath": "params_shard_36.bin",
+ "format": "raw-shard",
+ "nbytes": 32530432,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.3.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.30.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.30.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 11018240
+ },
+ {
+ "name": "model.layers.31.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 18358272
+ },
+ {
+ "name": "model.layers.31.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 18366464
+ },
+ {
+ "name": "model.layers.31.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 18374656
+ },
+ {
+ "name": "model.layers.31.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 30957568
+ }
+ ],
+ "md5sum": "f090e66edd23dcf73b05d64accde1148"
+ },
+ {
+ "dataPath": "params_shard_37.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "52c607eada0544fd8b300e3bafe85b37"
+ },
+ {
+ "dataPath": "params_shard_38.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "71459776ec3fe17d8912f3f45d1633e5"
+ },
+ {
+ "dataPath": "params_shard_39.bin",
+ "format": "raw-shard",
+ "nbytes": 28844032,
+ "records": [
+ {
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.6.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 21495808
+ },
+ {
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 21504000
+ }
+ ],
+ "md5sum": "98cad2c978ee84e32aea8232f5d9ca2e"
+ },
+ {
+ "dataPath": "params_shard_40.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "eff52722d4f6bbb88925a458be7d22d6"
+ },
+ {
+ "dataPath": "params_shard_41.bin",
+ "format": "raw-shard",
+ "nbytes": 21495808,
+ "records": [
+ {
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 14155776
+ }
+ ],
+ "md5sum": "8be78c79bcbc7dd59723a564cfbc72f6"
+ },
+ {
+ "dataPath": "params_shard_42.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "99167fb62218d3e26034c197f77a8aa3"
+ },
+ {
+ "dataPath": "params_shard_43.bin",
+ "format": "raw-shard",
+ "nbytes": 262674432,
+ "records": [
+ {
+ "name": "model.embed_tokens.q_weight",
+ "shape": [
+ 128259,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 262674432,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "be6213096f7954a8c08120cb6960f78e"
+ },
+ {
+ "dataPath": "params_shard_44.bin",
+ "format": "raw-shard",
+ "nbytes": 32834304,
+ "records": [
+ {
+ "name": "model.embed_tokens.q_scale",
+ "shape": [
+ 128259,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 32834304,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1c9142af090a3c0ad04a40a28500fd99"
+ },
+ {
+ "dataPath": "params_shard_45.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c7d1f88d42573d7bba4ab5c88acee7b7"
+ },
+ {
+ "dataPath": "params_shard_46.bin",
+ "format": "raw-shard",
+ "nbytes": 30957568,
+ "records": [
+ {
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.8.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.9.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 21504000
+ },
+ {
+ "name": "model.layers.0.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 21512192
+ },
+ {
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 21520384
+ },
+ {
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 29908992
+ }
+ ],
+ "md5sum": "05aaab3081fcc13134141848e282aa12"
+ },
+ {
+ "dataPath": "params_shard_47.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "9ade4887e2074741f712d3cab5823322"
+ },
+ {
+ "dataPath": "params_shard_48.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "00688a4fd29532f4caec6b5b509dcbee"
+ },
+ {
+ "dataPath": "params_shard_49.bin",
+ "format": "raw-shard",
+ "nbytes": 28860416,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.11.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.11.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.11.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 11018240
+ },
+ {
+ "name": "model.layers.12.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 11026432
+ },
+ {
+ "name": "model.layers.12.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 11034624
+ },
+ {
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 14704640
+ },
+ {
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 27287552
+ }
+ ],
+ "md5sum": "11087a54cf33aefa7161e775dce54f98"
+ },
+ {
+ "dataPath": "params_shard_50.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.15.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4121441eb5031984ac17c004c3bc8942"
+ },
+ {
+ "dataPath": "params_shard_51.bin",
+ "format": "raw-shard",
+ "nbytes": 27262976,
+ "records": [
+ {
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.15.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 25690112
+ }
+ ],
+ "md5sum": "8969128f561da2d8b4e77e8d1ed878f1"
+ },
+ {
+ "dataPath": "params_shard_52.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "cf2d41cca20a7323d0bf3668f3544747"
+ },
+ {
+ "dataPath": "params_shard_53.bin",
+ "format": "raw-shard",
+ "nbytes": 30941184,
+ "records": [
+ {
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.17.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.17.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 9445376
+ },
+ {
+ "name": "model.layers.17.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 16785408
+ },
+ {
+ "name": "model.layers.17.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 29368320
+ }
+ ],
+ "md5sum": "6b824c284774312b0d1ba3b179ce3423"
+ },
+ {
+ "dataPath": "params_shard_54.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.19.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "545ffeb92d1a3a9fed54475373707bd8"
+ },
+ {
+ "dataPath": "params_shard_55.bin",
+ "format": "raw-shard",
+ "nbytes": 27279360,
+ "records": [
+ {
+ "name": "model.layers.18.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.18.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.18.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.18.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 22544384
+ },
+ {
+ "name": "model.layers.19.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 23592960
+ },
+ {
+ "name": "model.layers.19.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 27262976
+ },
+ {
+ "name": "model.layers.2.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 27271168
+ }
+ ],
+ "md5sum": "16adcd1bcd0d080330eca9b1570f7f71"
+ },
+ {
+ "dataPath": "params_shard_56.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "431821dd1c8fe6b69b7384162f928dac"
+ },
+ {
+ "dataPath": "params_shard_57.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "8d4f5a09c2c6eca9bbd67de816f7ac6a"
+ },
+ {
+ "dataPath": "params_shard_58.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.22.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "791906fa74ff6201182223773d8f52ba"
+ },
+ {
+ "dataPath": "params_shard_59.bin",
+ "format": "raw-shard",
+ "nbytes": 27795456,
+ "records": [
+ {
+ "name": "model.layers.20.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.20.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.21.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.21.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 9445376
+ },
+ {
+ "name": "model.layers.21.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 13115392
+ },
+ {
+ "name": "model.layers.22.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 20455424
+ }
+ ],
+ "md5sum": "e58836bbb970d6e5a867c49aa72ab1a3"
+ },
+ {
+ "dataPath": "params_shard_60.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "221e435ae0e0ba172d3fe40b371366d2"
+ },
+ {
+ "dataPath": "params_shard_61.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d411fb0e5f70728fb1d078a1b5f453f3"
+ },
+ {
+ "dataPath": "params_shard_62.bin",
+ "format": "raw-shard",
+ "nbytes": 27271168,
+ "records": [
+ {
+ "name": "model.layers.22.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.22.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.23.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.23.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.23.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 22552576
+ },
+ {
+ "name": "model.layers.24.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 23601152
+ }
+ ],
+ "md5sum": "af3ad918225372ce0a49a92916160672"
+ },
+ {
+ "dataPath": "params_shard_63.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e7ab23102cbf9b825fd4908c42a7d271"
+ },
+ {
+ "dataPath": "params_shard_64.bin",
+ "format": "raw-shard",
+ "nbytes": 30957568,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.24.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.24.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.24.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 19931136
+ },
+ {
+ "name": "model.layers.25.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 21504000
+ },
+ {
+ "name": "model.layers.25.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 21512192
+ },
+ {
+ "name": "model.layers.25.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 29900800
+ },
+ {
+ "name": "model.layers.26.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 30949376
+ }
+ ],
+ "md5sum": "d16feaf8957f72286374c7c7dea778ee"
+ },
+ {
+ "dataPath": "params_shard_65.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.29.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4ec8dff5d06870bff3d4f8044cdef2a5"
+ },
+ {
+ "dataPath": "params_shard_66.bin",
+ "format": "raw-shard",
+ "nbytes": 33038336,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.26.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.26.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 15728640
+ },
+ {
+ "name": "model.layers.29.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 16777216
+ },
+ {
+ "name": "model.layers.29.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 20447232
+ },
+ {
+ "name": "model.layers.29.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 20455424
+ }
+ ],
+ "md5sum": "8bf88524e873c3c15f8ce9d1e4ab4384"
+ },
+ {
+ "dataPath": "params_shard_67.bin",
+ "format": "raw-shard",
+ "nbytes": 25174016,
+ "records": [
+ {
+ "name": "model.layers.29.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 1572864
+ },
+ {
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 15728640
+ },
+ {
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 24117248
+ },
+ {
+ "name": "model.layers.30.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 25165824
+ }
+ ],
+ "md5sum": "9af87b8a9dc8e8319a311dccf596b747"
+ },
+ {
+ "dataPath": "params_shard_68.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d99f164f744f5d26fadac05bb4b9d7a9"
+ },
+ {
+ "dataPath": "params_shard_69.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "10d0645f08b1772197615081aeae671c"
+ },
+ {
+ "dataPath": "params_shard_70.bin",
+ "format": "raw-shard",
+ "nbytes": 27262976,
+ "records": [
+ {
+ "name": "model.layers.30.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.30.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.30.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.30.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 22544384
+ },
+ {
+ "name": "model.layers.31.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 23592960
+ }
+ ],
+ "md5sum": "72adbac5687d46d5b21e01c4ea6ebca2"
+ },
+ {
+ "dataPath": "params_shard_71.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.4.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "221bba05efcce9895bf3a1fd65a7dea7"
+ },
+ {
+ "dataPath": "params_shard_72.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4a1e7a56eb466989e9b999c648ecee32"
+ },
+ {
+ "dataPath": "params_shard_73.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "6ee645cad39ca412f5399a4c8a8f773b"
+ },
+ {
+ "dataPath": "params_shard_74.bin",
+ "format": "raw-shard",
+ "nbytes": 32522240,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 11010048
+ },
+ {
+ "name": "model.layers.4.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 18350080
+ },
+ {
+ "name": "model.layers.5.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 18358272
+ },
+ {
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 18366464
+ },
+ {
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 30949376
+ }
+ ],
+ "md5sum": "e13680d69bfd480a0047f78e2d663fcd"
+ },
+ {
+ "dataPath": "params_shard_75.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f2578d375b31a2b3c288760c71b91325"
+ },
+ {
+ "dataPath": "params_shard_76.bin",
+ "format": "raw-shard",
+ "nbytes": 33030144,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 3670016
+ },
+ {
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 12058624
+ },
+ {
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 20447232
+ }
+ ],
+ "md5sum": "a5f785ce02bedfed686c5569d1750f0a"
+ },
+ {
+ "dataPath": "params_shard_77.bin",
+ "format": "raw-shard",
+ "nbytes": 30941184,
+ "records": [
+ {
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.9.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 1572864
+ },
+ {
+ "name": "model.layers.0.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 1581056
+ }
+ ],
+ "md5sum": "ef72a7b797e70980344ca361f597b75a"
+ },
+ {
+ "dataPath": "params_shard_78.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.10.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "fb56a3b0f0fe0c1bc523b2e06672f0b8"
+ },
+ {
+ "dataPath": "params_shard_79.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "cb9d334bd1ee278ed3b95f9632f3a8b0"
+ },
+ {
+ "dataPath": "params_shard_80.bin",
+ "format": "raw-shard",
+ "nbytes": 32530432,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.1.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 3670016
+ },
+ {
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 3678208
+ },
+ {
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 12066816
+ },
+ {
+ "name": "model.layers.10.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 13115392
+ },
+ {
+ "name": "model.layers.10.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 13123584
+ },
+ {
+ "name": "model.layers.10.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 16793600
+ },
+ {
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 16801792
+ },
+ {
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 24141824
+ }
+ ],
+ "md5sum": "44a141488093aef939ff54f598ab335d"
+ },
+ {
+ "dataPath": "params_shard_81.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "691c093af629bba4e5e92a37b380aecb"
+ },
+ {
+ "dataPath": "params_shard_82.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "bfb0485c993d15439860f8c302f320ac"
+ },
+ {
+ "dataPath": "params_shard_83.bin",
+ "format": "raw-shard",
+ "nbytes": 27287552,
+ "records": [
+ {
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 1048576
+ },
+ {
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 16777216
+ },
+ {
+ "name": "model.layers.14.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 17825792
+ },
+ {
+ "name": "model.layers.15.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 17833984
+ },
+ {
+ "name": "model.layers.15.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 17842176
+ },
+ {
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 17850368
+ },
+ {
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 26238976
+ }
+ ],
+ "md5sum": "77b213e7f2e06932be4748e74b48c463"
+ },
+ {
+ "dataPath": "params_shard_84.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a650420cd77953d673572db9272e3671"
+ },
+ {
+ "dataPath": "params_shard_85.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "07461f2dd52e7b9036a8bee83dae63f6"
+ },
+ {
+ "dataPath": "params_shard_86.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c671ded30b1d8ec17407dbc9f9036d69"
+ },
+ {
+ "dataPath": "params_shard_87.bin",
+ "format": "raw-shard",
+ "nbytes": 27811840,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.16.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.18.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.19.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 7356416
+ },
+ {
+ "name": "model.layers.19.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 15745024
+ },
+ {
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 16793600
+ },
+ {
+ "name": "model.layers.20.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 24133632
+ },
+ {
+ "name": "model.layers.20.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 24141824
+ }
+ ],
+ "md5sum": "15ce52123d37a9a2429cbed81d1da8af"
+ },
+ {
+ "dataPath": "params_shard_88.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4db3fff85cfa59cba995c4ddccde5105"
+ },
+ {
+ "dataPath": "params_shard_89.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "11e8e8796d457a6ec871e4cd3e116852"
+ },
+ {
+ "dataPath": "params_shard_90.bin",
+ "format": "raw-shard",
+ "nbytes": 24158208,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.20.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.21.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 7348224
+ },
+ {
+ "name": "model.layers.22.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 7356416
+ },
+ {
+ "name": "model.layers.22.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 15745024
+ },
+ {
+ "name": "model.layers.24.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 16793600
+ },
+ {
+ "name": "model.layers.25.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 16801792
+ },
+ {
+ "name": "model.layers.25.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 16809984
+ },
+ {
+ "name": "model.layers.26.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 20480000
+ },
+ {
+ "name": "model.layers.27.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 24150016
+ }
+ ],
+ "md5sum": "c60669008ff41dc21607053d1eccaebc"
+ },
+ {
+ "dataPath": "params_shard_91.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.28.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d9dd04ed0cce96e976e69e453ce3251b"
+ },
+ {
+ "dataPath": "params_shard_92.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.28.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "468e0375e3a6f57e36dd82ee512576bc"
+ },
+ {
+ "dataPath": "params_shard_93.bin",
+ "format": "raw-shard",
+ "nbytes": 25182208,
+ "records": [
+ {
+ "name": "model.layers.27.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 12582912
+ },
+ {
+ "name": "model.layers.28.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 14155776
+ },
+ {
+ "name": "model.layers.28.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 14163968
+ },
+ {
+ "name": "model.layers.28.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 17833984
+ },
+ {
+ "name": "model.layers.28.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 25174016
+ }
+ ],
+ "md5sum": "308be9ecf39c67b265f6e34184e88a38"
+ },
+ {
+ "dataPath": "params_shard_94.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "734041d34a5b53e3ff858a8917397737"
+ },
+ {
+ "dataPath": "params_shard_95.bin",
+ "format": "raw-shard",
+ "nbytes": 31997952,
+ "records": [
+ {
+ "name": "model.layers.28.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.28.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.29.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.29.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 17825792
+ },
+ {
+ "name": "model.layers.3.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 18874368
+ },
+ {
+ "name": "model.layers.3.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 18882560
+ },
+ {
+ "name": "model.layers.30.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 22552576
+ },
+ {
+ "name": "model.layers.31.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 22560768
+ },
+ {
+ "name": "model.layers.31.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 30949376
+ }
+ ],
+ "md5sum": "3bc9a61d309ad791ebffda4a3dd705b2"
+ },
+ {
+ "dataPath": "params_shard_96.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.5.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "fb55bea1231a08cf2199b01a1d0bb993"
+ },
+ {
+ "dataPath": "params_shard_97.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.7.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "23ce068b8d009d834336d558dcbe0474"
+ },
+ {
+ "dataPath": "params_shard_98.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.8.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "725c24e682090873b736262152f643d3"
+ },
+ {
+ "dataPath": "params_shard_99.bin",
+ "format": "raw-shard",
+ "nbytes": 29917184,
+ "records": [
+ {
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.5.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.5.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 9445376
+ },
+ {
+ "name": "model.layers.6.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 13115392
+ },
+ {
+ "name": "model.layers.7.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 13123584
+ },
+ {
+ "name": "model.layers.7.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 16793600
+ },
+ {
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 16801792
+ },
+ {
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 25190400
+ },
+ {
+ "name": "model.layers.8.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 26238976
+ },
+ {
+ "name": "model.layers.8.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 26247168
+ }
+ ],
+ "md5sum": "e62f0ad863b9ba04638fee26473a3eb4"
+ },
+ {
+ "dataPath": "params_shard_100.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4b661f8f6854044bd00c54135afbb728"
+ },
+ {
+ "dataPath": "params_shard_101.bin",
+ "format": "raw-shard",
+ "nbytes": 33079296,
+ "records": [
+ {
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 8388608
+ },
+ {
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 6144,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 12582912,
+ "byteOffset": 9437184
+ },
+ {
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 6144,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1572864,
+ "byteOffset": 22020096
+ },
+ {
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 23592960
+ },
+ {
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 31981568
+ },
+ {
+ "name": "model.norm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 33030144
+ },
+ {
+ "name": "model.layers.1.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 33038336
+ },
+ {
+ "name": "model.layers.12.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 33046528
+ },
+ {
+ "name": "model.layers.13.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 33054720
+ },
+ {
+ "name": "model.layers.14.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 33062912
+ },
+ {
+ "name": "model.layers.17.post_attention_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 33071104
+ }
+ ],
+ "md5sum": "8a877a1c53c0c585a003594762344a03"
+ },
+ {
+ "dataPath": "params_shard_102.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.22.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "27b02096251b20d61801b2c18d38884c"
+ },
+ {
+ "dataPath": "params_shard_103.bin",
+ "format": "raw-shard",
+ "nbytes": 58720256,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 28672,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 58720256,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d125e0787255ba412bba0582ec4113ba"
+ },
+ {
+ "dataPath": "params_shard_104.bin",
+ "format": "raw-shard",
+ "nbytes": 26214400,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.21.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 3670016
+ },
+ {
+ "name": "model.layers.21.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 12058624
+ },
+ {
+ "name": "model.layers.22.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.24.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 16777216
+ },
+ {
+ "name": "model.layers.24.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 25165824
+ }
+ ],
+ "md5sum": "0d3b524da5435b78d348b7cbf6702906"
+ },
+ {
+ "dataPath": "params_shard_105.bin",
+ "format": "raw-shard",
+ "nbytes": 29360128,
+ "records": [
+ {
+ "name": "model.layers.9.mlp.down_proj.q_weight",
+ "shape": [
+ 4096,
+ 1792
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 29360128,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "720114b139ff44092755d63d2a50512d"
+ },
+ {
+ "dataPath": "params_shard_106.bin",
+ "format": "raw-shard",
+ "nbytes": 29900800,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 28672,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 7340032,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 7340032
+ },
+ {
+ "name": "model.layers.27.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 15728640
+ },
+ {
+ "name": "model.layers.4.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 16777216
+ },
+ {
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
+ "shape": [
+ 4096,
+ 512
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 8388608,
+ "byteOffset": 16785408
+ },
+ {
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
+ "shape": [
+ 4096,
+ 128
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 1048576,
+ "byteOffset": 25174016
+ },
+ {
+ "name": "model.layers.7.input_layernorm.weight",
+ "shape": [
+ 4096
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 8192,
+ "byteOffset": 26222592
+ },
+ {
+ "name": "model.layers.9.mlp.down_proj.q_scale",
+ "shape": [
+ 4096,
+ 448
+ ],
+ "dtype": "float32",
+ "format": "f32-to-bf16",
+ "nbytes": 3670016,
+ "byteOffset": 26230784
+ }
+ ],
+ "md5sum": "48860c8bc90660d3ba529db23e3df4ad"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..de3c72f94f3d8f74ac290d0f405f16c7fb48637b
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:31d17e500c82ff116fc14d4dd0106a08e49bd3512d8a81473047121ef8ea09f9
+size 17210524
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..36c19e04d1d497adb217b6481528bdae7cc61151
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,2088 @@
+{
+ "added_tokens_decoder": {
+ "128000": {
+ "content": "<|begin_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128001": {
+ "content": "<|end_of_text|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128002": {
+ "content": "<|reserved_special_token_0|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128003": {
+ "content": "<|reserved_special_token_1|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128004": {
+ "content": "<|reserved_special_token_2|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128005": {
+ "content": "<|reserved_special_token_3|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128006": {
+ "content": "<|start_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128007": {
+ "content": "<|end_header_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128008": {
+ "content": "<|reserved_special_token_4|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128009": {
+ "content": "<|eot_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128010": {
+ "content": "<|reserved_special_token_5|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128011": {
+ "content": "<|reserved_special_token_6|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128012": {
+ "content": "<|reserved_special_token_7|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128013": {
+ "content": "<|reserved_special_token_8|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128014": {
+ "content": "<|reserved_special_token_9|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128015": {
+ "content": "<|reserved_special_token_10|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128016": {
+ "content": "<|reserved_special_token_11|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128017": {
+ "content": "<|reserved_special_token_12|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128018": {
+ "content": "<|reserved_special_token_13|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128019": {
+ "content": "<|reserved_special_token_14|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128020": {
+ "content": "<|reserved_special_token_15|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128021": {
+ "content": "<|reserved_special_token_16|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128022": {
+ "content": "<|reserved_special_token_17|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128023": {
+ "content": "<|reserved_special_token_18|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128024": {
+ "content": "<|reserved_special_token_19|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128025": {
+ "content": "<|reserved_special_token_20|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128026": {
+ "content": "<|reserved_special_token_21|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128027": {
+ "content": "<|reserved_special_token_22|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128028": {
+ "content": "<|reserved_special_token_23|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128029": {
+ "content": "<|reserved_special_token_24|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128030": {
+ "content": "<|reserved_special_token_25|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128031": {
+ "content": "<|reserved_special_token_26|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128032": {
+ "content": "<|reserved_special_token_27|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128033": {
+ "content": "<|reserved_special_token_28|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128034": {
+ "content": "<|reserved_special_token_29|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128035": {
+ "content": "<|reserved_special_token_30|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128036": {
+ "content": "<|reserved_special_token_31|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128037": {
+ "content": "<|reserved_special_token_32|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128038": {
+ "content": "<|reserved_special_token_33|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128039": {
+ "content": "<|reserved_special_token_34|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128040": {
+ "content": "<|reserved_special_token_35|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128041": {
+ "content": "<|reserved_special_token_36|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128042": {
+ "content": "<|reserved_special_token_37|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128043": {
+ "content": "<|reserved_special_token_38|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128044": {
+ "content": "<|reserved_special_token_39|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128045": {
+ "content": "<|reserved_special_token_40|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128046": {
+ "content": "<|reserved_special_token_41|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128047": {
+ "content": "<|reserved_special_token_42|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128048": {
+ "content": "<|reserved_special_token_43|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128049": {
+ "content": "<|reserved_special_token_44|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128050": {
+ "content": "<|reserved_special_token_45|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128051": {
+ "content": "<|reserved_special_token_46|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128052": {
+ "content": "<|reserved_special_token_47|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128053": {
+ "content": "<|reserved_special_token_48|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128054": {
+ "content": "<|reserved_special_token_49|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128055": {
+ "content": "<|reserved_special_token_50|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128056": {
+ "content": "<|reserved_special_token_51|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128057": {
+ "content": "<|reserved_special_token_52|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128058": {
+ "content": "<|reserved_special_token_53|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128059": {
+ "content": "<|reserved_special_token_54|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128060": {
+ "content": "<|reserved_special_token_55|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128061": {
+ "content": "<|reserved_special_token_56|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128062": {
+ "content": "<|reserved_special_token_57|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128063": {
+ "content": "<|reserved_special_token_58|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128064": {
+ "content": "<|reserved_special_token_59|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128065": {
+ "content": "<|reserved_special_token_60|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128066": {
+ "content": "<|reserved_special_token_61|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128067": {
+ "content": "<|reserved_special_token_62|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128068": {
+ "content": "<|reserved_special_token_63|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128069": {
+ "content": "<|reserved_special_token_64|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128070": {
+ "content": "<|reserved_special_token_65|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128071": {
+ "content": "<|reserved_special_token_66|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128072": {
+ "content": "<|reserved_special_token_67|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128073": {
+ "content": "<|reserved_special_token_68|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128074": {
+ "content": "<|reserved_special_token_69|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128075": {
+ "content": "<|reserved_special_token_70|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128076": {
+ "content": "<|reserved_special_token_71|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128077": {
+ "content": "<|reserved_special_token_72|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128078": {
+ "content": "<|reserved_special_token_73|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128079": {
+ "content": "<|reserved_special_token_74|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128080": {
+ "content": "<|reserved_special_token_75|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128081": {
+ "content": "<|reserved_special_token_76|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128082": {
+ "content": "<|reserved_special_token_77|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128083": {
+ "content": "<|reserved_special_token_78|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128084": {
+ "content": "<|reserved_special_token_79|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128085": {
+ "content": "<|reserved_special_token_80|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128086": {
+ "content": "<|reserved_special_token_81|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128087": {
+ "content": "<|reserved_special_token_82|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128088": {
+ "content": "<|reserved_special_token_83|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128089": {
+ "content": "<|reserved_special_token_84|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128090": {
+ "content": "<|reserved_special_token_85|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128091": {
+ "content": "<|reserved_special_token_86|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128092": {
+ "content": "<|reserved_special_token_87|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128093": {
+ "content": "<|reserved_special_token_88|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128094": {
+ "content": "<|reserved_special_token_89|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128095": {
+ "content": "<|reserved_special_token_90|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128096": {
+ "content": "<|reserved_special_token_91|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128097": {
+ "content": "<|reserved_special_token_92|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128098": {
+ "content": "<|reserved_special_token_93|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128099": {
+ "content": "<|reserved_special_token_94|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128100": {
+ "content": "<|reserved_special_token_95|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128101": {
+ "content": "<|reserved_special_token_96|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128102": {
+ "content": "<|reserved_special_token_97|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128103": {
+ "content": "<|reserved_special_token_98|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128104": {
+ "content": "<|reserved_special_token_99|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128105": {
+ "content": "<|reserved_special_token_100|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128106": {
+ "content": "<|reserved_special_token_101|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128107": {
+ "content": "<|reserved_special_token_102|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128108": {
+ "content": "<|reserved_special_token_103|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128109": {
+ "content": "<|reserved_special_token_104|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128110": {
+ "content": "<|reserved_special_token_105|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128111": {
+ "content": "<|reserved_special_token_106|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128112": {
+ "content": "<|reserved_special_token_107|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128113": {
+ "content": "<|reserved_special_token_108|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128114": {
+ "content": "<|reserved_special_token_109|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128115": {
+ "content": "<|reserved_special_token_110|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128116": {
+ "content": "<|reserved_special_token_111|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128117": {
+ "content": "<|reserved_special_token_112|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128118": {
+ "content": "<|reserved_special_token_113|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128119": {
+ "content": "<|reserved_special_token_114|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128120": {
+ "content": "<|reserved_special_token_115|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128121": {
+ "content": "<|reserved_special_token_116|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128122": {
+ "content": "<|reserved_special_token_117|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128123": {
+ "content": "<|reserved_special_token_118|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128124": {
+ "content": "<|reserved_special_token_119|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128125": {
+ "content": "<|reserved_special_token_120|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128126": {
+ "content": "<|reserved_special_token_121|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128127": {
+ "content": "<|reserved_special_token_122|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128128": {
+ "content": "<|reserved_special_token_123|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128129": {
+ "content": "<|reserved_special_token_124|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128130": {
+ "content": "<|reserved_special_token_125|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128131": {
+ "content": "<|reserved_special_token_126|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128132": {
+ "content": "<|reserved_special_token_127|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128133": {
+ "content": "<|reserved_special_token_128|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128134": {
+ "content": "<|reserved_special_token_129|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128135": {
+ "content": "<|reserved_special_token_130|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128136": {
+ "content": "<|reserved_special_token_131|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128137": {
+ "content": "<|reserved_special_token_132|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128138": {
+ "content": "<|reserved_special_token_133|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128139": {
+ "content": "<|reserved_special_token_134|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128140": {
+ "content": "<|reserved_special_token_135|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128141": {
+ "content": "<|reserved_special_token_136|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128142": {
+ "content": "<|reserved_special_token_137|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128143": {
+ "content": "<|reserved_special_token_138|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128144": {
+ "content": "<|reserved_special_token_139|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128145": {
+ "content": "<|reserved_special_token_140|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128146": {
+ "content": "<|reserved_special_token_141|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128147": {
+ "content": "<|reserved_special_token_142|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128148": {
+ "content": "<|reserved_special_token_143|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128149": {
+ "content": "<|reserved_special_token_144|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128150": {
+ "content": "<|reserved_special_token_145|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128151": {
+ "content": "<|reserved_special_token_146|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128152": {
+ "content": "<|reserved_special_token_147|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128153": {
+ "content": "<|reserved_special_token_148|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128154": {
+ "content": "<|reserved_special_token_149|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128155": {
+ "content": "<|reserved_special_token_150|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128156": {
+ "content": "<|reserved_special_token_151|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128157": {
+ "content": "<|reserved_special_token_152|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128158": {
+ "content": "<|reserved_special_token_153|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128159": {
+ "content": "<|reserved_special_token_154|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128160": {
+ "content": "<|reserved_special_token_155|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128161": {
+ "content": "<|reserved_special_token_156|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128162": {
+ "content": "<|reserved_special_token_157|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128163": {
+ "content": "<|reserved_special_token_158|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128164": {
+ "content": "<|reserved_special_token_159|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128165": {
+ "content": "<|reserved_special_token_160|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128166": {
+ "content": "<|reserved_special_token_161|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128167": {
+ "content": "<|reserved_special_token_162|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128168": {
+ "content": "<|reserved_special_token_163|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128169": {
+ "content": "<|reserved_special_token_164|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128170": {
+ "content": "<|reserved_special_token_165|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128171": {
+ "content": "<|reserved_special_token_166|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128172": {
+ "content": "<|reserved_special_token_167|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128173": {
+ "content": "<|reserved_special_token_168|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128174": {
+ "content": "<|reserved_special_token_169|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128175": {
+ "content": "<|reserved_special_token_170|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128176": {
+ "content": "<|reserved_special_token_171|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128177": {
+ "content": "<|reserved_special_token_172|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128178": {
+ "content": "<|reserved_special_token_173|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128179": {
+ "content": "<|reserved_special_token_174|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128180": {
+ "content": "<|reserved_special_token_175|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128181": {
+ "content": "<|reserved_special_token_176|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128182": {
+ "content": "<|reserved_special_token_177|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128183": {
+ "content": "<|reserved_special_token_178|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128184": {
+ "content": "<|reserved_special_token_179|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128185": {
+ "content": "<|reserved_special_token_180|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128186": {
+ "content": "<|reserved_special_token_181|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128187": {
+ "content": "<|reserved_special_token_182|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128188": {
+ "content": "<|reserved_special_token_183|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128189": {
+ "content": "<|reserved_special_token_184|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128190": {
+ "content": "<|reserved_special_token_185|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128191": {
+ "content": "<|reserved_special_token_186|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128192": {
+ "content": "<|reserved_special_token_187|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128193": {
+ "content": "<|reserved_special_token_188|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128194": {
+ "content": "<|reserved_special_token_189|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128195": {
+ "content": "<|reserved_special_token_190|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128196": {
+ "content": "<|reserved_special_token_191|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128197": {
+ "content": "<|reserved_special_token_192|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128198": {
+ "content": "<|reserved_special_token_193|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128199": {
+ "content": "<|reserved_special_token_194|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128200": {
+ "content": "<|reserved_special_token_195|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128201": {
+ "content": "<|reserved_special_token_196|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128202": {
+ "content": "<|reserved_special_token_197|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128203": {
+ "content": "<|reserved_special_token_198|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128204": {
+ "content": "<|reserved_special_token_199|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128205": {
+ "content": "<|reserved_special_token_200|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128206": {
+ "content": "<|reserved_special_token_201|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128207": {
+ "content": "<|reserved_special_token_202|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128208": {
+ "content": "<|reserved_special_token_203|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128209": {
+ "content": "<|reserved_special_token_204|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128210": {
+ "content": "<|reserved_special_token_205|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128211": {
+ "content": "<|reserved_special_token_206|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128212": {
+ "content": "<|reserved_special_token_207|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128213": {
+ "content": "<|reserved_special_token_208|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128214": {
+ "content": "<|reserved_special_token_209|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128215": {
+ "content": "<|reserved_special_token_210|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128216": {
+ "content": "<|reserved_special_token_211|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128217": {
+ "content": "<|reserved_special_token_212|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128218": {
+ "content": "<|reserved_special_token_213|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128219": {
+ "content": "<|reserved_special_token_214|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128220": {
+ "content": "<|reserved_special_token_215|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128221": {
+ "content": "<|reserved_special_token_216|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128222": {
+ "content": "<|reserved_special_token_217|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128223": {
+ "content": "<|reserved_special_token_218|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128224": {
+ "content": "<|reserved_special_token_219|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128225": {
+ "content": "<|reserved_special_token_220|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128226": {
+ "content": "<|reserved_special_token_221|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128227": {
+ "content": "<|reserved_special_token_222|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128228": {
+ "content": "<|reserved_special_token_223|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128229": {
+ "content": "<|reserved_special_token_224|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128230": {
+ "content": "<|reserved_special_token_225|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128231": {
+ "content": "<|reserved_special_token_226|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128232": {
+ "content": "<|reserved_special_token_227|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128233": {
+ "content": "<|reserved_special_token_228|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128234": {
+ "content": "<|reserved_special_token_229|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128235": {
+ "content": "<|reserved_special_token_230|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128236": {
+ "content": "<|reserved_special_token_231|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128237": {
+ "content": "<|reserved_special_token_232|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128238": {
+ "content": "<|reserved_special_token_233|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128239": {
+ "content": "<|reserved_special_token_234|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128240": {
+ "content": "<|reserved_special_token_235|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128241": {
+ "content": "<|reserved_special_token_236|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128242": {
+ "content": "<|reserved_special_token_237|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128243": {
+ "content": "<|reserved_special_token_238|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128244": {
+ "content": "<|reserved_special_token_239|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128245": {
+ "content": "<|reserved_special_token_240|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128246": {
+ "content": "<|reserved_special_token_241|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128247": {
+ "content": "<|reserved_special_token_242|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128248": {
+ "content": "<|reserved_special_token_243|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128249": {
+ "content": "<|reserved_special_token_244|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128250": {
+ "content": "<|reserved_special_token_245|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128251": {
+ "content": "<|reserved_special_token_246|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128252": {
+ "content": "<|reserved_special_token_247|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128253": {
+ "content": "<|reserved_special_token_248|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128254": {
+ "content": "<|reserved_special_token_249|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128255": {
+ "content": "<|reserved_special_token_250|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128256": {
+ "content": "<|eom_id|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128257": {
+ "content": "<|python_tag|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "128258": {
+ "content": "<|NONE|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "<|begin_of_text|>",
+ "chat_template": "{# version=v3-llama3.1 #}{%- macro append_new_param_info(param_declaration, comment_info, examples_info, depth) -%}\n {%- set offset = \"\" -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- if examples_info | length > 0 -%}\n {# Append each example info #}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{ \"\\n\" + offset + param_declaration }}\n{%- endmacro -%}\n\n{%- macro convert_data_type(param_type) -%}\n {%- if param_type == \"integer\" or param_type == \"float\" -%}\n {{ \"number\" }}\n {%- else -%}\n {{ param_type }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_param_type(param) -%}\n {%- set param_type = \"any\" -%}\n\n {%- if \"type\" in param -%}\n {%- set raw_param_type = param[\"type\"] -%}\n {%- if raw_param_type is iterable and raw_param_type is not string -%}\n {%- set param_type = raw_param_type | join(\" | \") -%}\n {%- else -%}\n {%- set param_type = raw_param_type -%}\n {%- endif -%}\n {{ convert_data_type(param_type) }}\n {%- elif \"oneOf\" in param -%}\n {%- set one_of_types = param[\"oneOf\"]|selectattr(\"type\", \"defined\")|list -%}\n {%- set one_of_types = one_of_types|map(attribute=\"type\")|unique|list -%}\n {{ convert_data_type(one_of_types | join(\" | \")) }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_format_param(param) -%}\n {%- if \"format\" in param -%}\n {{ param[\"format\"] }}\n {%- elif \"oneOf\" in param -%}\n {%- set formats = [] -%}\n {%- for item in param[\"oneOf\"] -%}\n {%- if \"format\" in item -%}\n {%- if item[\"format\"] == param[\"oneOf\"][-1][\"format\"] -%}\n {{ item[\"format\"] }}\n {%- else -%}\n {{ item[\"format\"] + \" or \"}}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ \"<|NONE|>\" }}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_param_info(param) -%}\n {%- set param_type = param.get(\"type\", \"any\") -%}\n {%- set format_param = get_format_param(param) -%}\n\n {%- if \"description\" in param or \"default\" in param or format_param != \"<|NONE|>\" or param[\"maximum\"] or param[\"minimum\"] or param[\"maxLength\"] or param[\"minLength\"] -%}\n {{ \"//\" }}\n {%- if \"description\" in param -%}\n {%- set desc = param[\"description\"] -%}\n {%- if not desc.endswith(\".\") -%}\n {%- set desc = desc + \".\" -%}\n {%- endif -%}\n {{ \" \" + desc }}\n {%- endif -%}\n\n {%- if \"default\" in param -%}\n {%- set default_value = param[\"default\"] -%}\n {%- if param_type == \"string\" -%}\n {%- set default_value = '\"' ~ default_value ~ '\"' -%}\n {%- endif -%}\n {{ \" Default=\" ~ default_value ~ \".\" }}\n {%- endif -%}\n\n {%- set format_param = get_format_param(param) -%}\n {%- if format_param != \"<|NONE|>\" -%}\n {{ \" Format=\" ~ format_param }}\n {%- endif -%}\n\n {%- for field, field_name in [(\"maximum\", \"Maximum\"), (\"minimum\", \"Minimum\"), (\"maxLength\", \"Maximum length\"), (\"minLength\", \"Minimum length\")] -%}\n {%- if field in param -%}\n {{ \" \" + field_name ~ \"=\" ~ param[field] }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ \"<|NONE|>\"}}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_enum_option_str(enum_options) -%}\n {%- for v in enum_options -%}\n {%- if v is string -%}\n {{ '\"' + v + '\"' }}\n {%- else -%}\n {{ v }}\n {%- endif -%}\n {%- if enum_options|length > 0 and v != enum_options[-1] -%}\n {{ \" | \" }}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n\n{%- macro get_array_typescript(param_name, param_dic, depth) -%}\n {%- set offset = '' -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n {%- set items_info = param_dic.get('items', {}) -%}\n\n {%- if items_info|length == 0 -%}\n {%- if param_name -%}\n {{ \"\\n\" + offset + param_name + \": []\" }}\n {%- else -%}\n {{ \"\\n\" + offset + \"[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- set array_type = get_param_type(items_info) -%}\n {%- if array_type == 'object' -%}\n {%- if param_name -%}\n {{ \"\\n\" + offset + param_name + \": {\" }}\n {%- else -%}\n {{ \"\\n\" + offset + \"{\" }}\n {%- endif -%}\n {{ get_parameter_typescript(items_info.get('properties', {}), items_info.get('required', []), depth + 1) -}}\n {{- \"\\n\" + offset + \"}[]\" }}\n {%- elif array_type == 'array' -%}\n {%- set item_info = get_array_typescript(None, items_info, depth + 1) -%}\n {%- if not param_name -%}\n {{ \"\\n\" + item_info + \"[]\" }}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": \" + item_info|trim + \"[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- if 'enum' in items_info -%}\n {%- set item_type = get_enum_option_str(items_info['enum']) -%}\n {%- if param_name is none -%}\n {{ \"(\" + item_type + \")[]\"}}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": (\" + item_type + \")[]\" }}\n {%- endif -%}\n {%- else -%}\n {%- if param_name is none -%}\n {{ \"\\n\" + array_type + \"[]\" }}\n {%- else -%}\n {{ \"\\n\" + offset + param_name + \": \" + array_type + \"[],\" }}\n {%- endif -%}\n {%- endif -%}\n {%- endif -%}\n {%- endif -%}\n{%- endmacro -%}\n\n{%- macro get_parameter_typescript(properties, required_params, depth=0) -%}\n {%- set res = \"\" -%}\n {%- for param_name, param in properties.items() -%}\n {%- if param is mapping -%}\n {%- set comment_info = get_param_info(param) -%}\n {# Param Examples #}\n {%- set examples_info = [] -%}\n {%- if \"examples\" in param -%}\n {%- set examples_info = [\"Example \" + param_name + \":\"] -%}\n {%- set examples_info = examples_info + param[\"examples\"] -%}\n {%- endif -%}\n\n {# Param Name declaration #}\n {%- set param_declaration = param_name -%}\n {%- if required_params is iterable and param_name not in required_params -%}\n {%- set param_declaration = param_declaration + \"?\" -%}\n {%- endif -%}\n\n {%- set param_type = get_param_type(param) -%}\n\n {# Handle indentation based on depth #}\n {%- set offset = \"\" -%}\n {%- if depth >= 1 -%}\n {%- set offset = \" \" * depth -%}\n {%- endif -%}\n\n {%- if param_type == \"object\" -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- endif -%}\n {%- if examples_info|length > 0 -%}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- set param_declaration = param_declaration + \": {\" -%}\n {{ \"\\n\" + offset + param_declaration -}}\n {{- get_parameter_typescript(param.get(\"properties\", {}), param.get(\"required\", []), depth + 1) -}}\n {{- \"\\n\" + offset + \"},\" }}\n {%- elif param_type == \"array\" -%}\n {%- set item_info = param.get(\"items\", {}) -%}\n {%- if \"type\" not in item_info -%}\n {%- set param_declaration = param_declaration + \": [],\" -%}\n {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}\n {%- else -%}\n {%- if comment_info != \"<|NONE|>\" -%}\n {{ \"\\n\" + offset + comment_info }}\n {%- endif -%}\n {%- if examples_info|length > 0 -%}\n {%- for example in examples_info -%}\n {{ \"\\n\" + offset + \"// \" + example|string|replace(\"'\", '\"') }}\n {%- endfor -%}\n {%- endif -%}\n {%- set array_declaration = get_array_typescript(param_declaration, param, depth) -%}\n {%- if not array_declaration.endswith(\",\") -%}\n {%- set array_declaration = array_declaration + \",\" -%}\n {%- endif -%}\n {{ array_declaration}}\n {%- endif -%}\n {%- else -%}\n {%- if \"enum\" in param -%}\n {%- set param_type = get_enum_option_str(param[\"enum\"]) -%}\n {%- endif -%}\n {%- if \"nullable\" in param and param[\"nullable\"] -%}\n {%- set param_type = param_type + \" | null\" -%}\n {%- endif -%}\n {%- set param_declaration = param_declaration + \": \" + param_type + \",\" -%}\n {{ append_new_param_info(param_declaration, comment_info, examples_info, depth) }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n\n{%- macro generate_schema_from_functions(functions, namespace='functions') -%}\n {{ \"// Supported function definitions that should be called when necessary.\\n\" -}}\n {{- \"namespace \" + namespace + \" {\\n\\n\" -}}\n\n {%- for function in functions -%}\n {%- if function.get(\"function\") -%}\n {%- set function = function.get(\"function\") -%}\n {%- endif -%}\n\n {%- set function_name = function.get(\"name\") -%}\n {%- if function_name -%}\n {%- set description = function.get('description', '') -%}\n {%- set parameters = function.get('parameters', {}) -%}\n {{- \"// \" + description + \"\\n\" -}}\n {{- \"type \" + function_name -}}\n {%- if parameters and parameters.get(\"properties\") -%}\n {{- \" = (_: {\" -}}\n {%- set required_params = parameters.get(\"required\", []) -%}\n {{ get_parameter_typescript(parameters.get(\"properties\"), required_params, 0) -}}\n {{- \"\\n}) => any;\\n\\n\" }}\n {%- else -%}\n {{ \" = () => any;\\n\\n\" }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n {{ \"} // namespace \" + namespace }}\n{%- endmacro -%}\n{%- if not tools is defined -%}\n {%- set tools = none -%}\n{%- endif -%}\n\n{%- set has_code_interpreter = tools | selectattr(\"type\", \"equalto\", \"code_interpreter\") | list | length > 0 -%}\n{%- if has_code_interpreter -%}\n {%- set tools = tools | rejectattr(\"type\", \"equalto\", \"code_interpreter\") | list -%}\n{%- endif -%}\n\n{#- System message + builtin tools #}\n{{- bos_token + \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if has_code_interpreter %}\n {{- \"Environment: ipython\\n\\n\" }}\n{%- else -%}\n {{ \"\\n\"}}\n{%- endif %}\n{%- if tools %}\n {{- \"\\nYou have access to the following functions:\\n\\n\" }}\n {%- for t in tools %}\n {%- if \"type\" in t -%}\n {{ \"Use the function '\" + t[\"function\"][\"name\"] + \"' to '\" + t[\"function\"][\"description\"] + \"'\\n\" + t[\"function\"] | tojson() }}\n {%- else -%}\n {{ \"Use the function '\" + t[\"name\"] + \"' to '\" + t[\"description\"] + \"'\\n\" + t | tojson }}\n {%- endif -%}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- '\\nThink very carefully before calling functions.\\nIf a you choose to call a function ONLY reply in the following format:\\n<{start_tag}={function_name}>{parameters}{end_tag}\\nwhere\\n\\nstart_tag => ` a JSON dict with the function argument name as key and function argument value as value.\\nend_tag => ``\\n\\nHere is an example,\\n{\"example_name\": \"example_value\"}\\n\\nReminder:\\n- If looking for real time information use relevant functions before falling back to brave_search\\n- Function calls MUST follow the specified format, start with \\n- Required parameters MUST be specified\\n- Only call one function at a time\\n- Put the entire function call reply on one line\\n\\n' -}}\n{%- endif %}\n{{- \"<|eot_id|>\" -}}\n\n{%- for message in messages -%}\n {%- if message['role'] == 'user' or message['role'] == 'system' -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n' + message['content'] + '<|eot_id|>' }}\n {%- elif message['role'] == 'tool' -%}\n {{ '<|start_header_id|>ipython<|end_header_id|>\\n\\n' + message['content'] + '<|eot_id|>' }}\n {%- else -%}\n {%- if (message['content'] and message['content']|length > 0) or ('tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0) -%}\n {{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'}}\n {%- endif -%}\n {%- if message['content'] and message['content']|length > 0 -%}\n {{ message['content'] }}\n {%- endif -%}\n {%- if 'tool_calls' in message and message['tool_calls'] and message['tool_calls']|length > 0 -%}\n {%- for tool_call in message['tool_calls'] -%}\n {%- if tool_call[\"function\"][\"name\"] == \"python\" -%}\n {{ '<|python_tag|>' + tool_call['function']['arguments'] }}\n {%- else -%}\n {{ '' + tool_call['function']['arguments'] + '' }}\n {%- endif -%}\n {%- endfor -%}\n {{ '<|eom_id|>' }}\n {%- elif message['content'] and message['content']|length > 0 -%}\n {{ '<|eot_id|>' }}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{ '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif -%}\n",
+ "clean_up_tokenization_spaces": true,
+ "eos_token": "<|eot_id|>",
+ "extra_special_tokens": {},
+ "model_input_names": [
+ "input_ids",
+ "attention_mask"
+ ],
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "<|end_of_text|>",
+ "tokenizer_class": "PreTrainedTokenizer"
+}