tomkay commited on Apr 6

Commit

d61370a

verified ·

1 Parent(s): cf7bd06

RAM 170GB mixed-precision quantization of Llama-4-Maverick

Browse files

Files changed (44) hide show

.gitattributes +1 -0
README.md +40 -0
chat_template.jinja +111 -0
config.json +0 -0
generation_config.json +13 -0
model-00001-of-00036.safetensors +3 -0
model-00002-of-00036.safetensors +3 -0
model-00003-of-00036.safetensors +3 -0
model-00004-of-00036.safetensors +3 -0
model-00005-of-00036.safetensors +3 -0
model-00006-of-00036.safetensors +3 -0
model-00007-of-00036.safetensors +3 -0
model-00008-of-00036.safetensors +3 -0
model-00009-of-00036.safetensors +3 -0
model-00010-of-00036.safetensors +3 -0
model-00011-of-00036.safetensors +3 -0
model-00012-of-00036.safetensors +3 -0
model-00013-of-00036.safetensors +3 -0
model-00014-of-00036.safetensors +3 -0
model-00015-of-00036.safetensors +3 -0
model-00016-of-00036.safetensors +3 -0
model-00017-of-00036.safetensors +3 -0
model-00018-of-00036.safetensors +3 -0
model-00019-of-00036.safetensors +3 -0
model-00020-of-00036.safetensors +3 -0
model-00021-of-00036.safetensors +3 -0
model-00022-of-00036.safetensors +3 -0
model-00023-of-00036.safetensors +3 -0
model-00024-of-00036.safetensors +3 -0
model-00025-of-00036.safetensors +3 -0
model-00026-of-00036.safetensors +3 -0
model-00027-of-00036.safetensors +3 -0
model-00028-of-00036.safetensors +3 -0
model-00029-of-00036.safetensors +3 -0
model-00030-of-00036.safetensors +3 -0
model-00031-of-00036.safetensors +3 -0
model-00032-of-00036.safetensors +3 -0
model-00033-of-00036.safetensors +3 -0
model-00034-of-00036.safetensors +3 -0
model-00035-of-00036.safetensors +3 -0
model-00036-of-00036.safetensors +3 -0
model.safetensors.index.json +0 -0
tokenizer.json +3 -0
tokenizer_config.json +15 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,40 @@

+---
+library_name: mlx
+tags:
+  - mlx
+  - quantized
+  - mixed-precision
+  - llama4
+  - moe
+license: other
+license_name: llama4
+base_model: meta-llama/Llama-4-Maverick-17B-128E-Instruct
+base_model_relation: quantized
+---
+# Llama-4-Maverick-17B-128E-Instruct — 170GB (MLX)
+Mixed-precision quantized version of [meta-llama/Llama-4-Maverick-17B-128E-Instruct](https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct) optimised by [baa.ai](https://baa.ai) using a proprietary Black Sheep AI method.
+Per-tensor bit-width allocation via advanced sensitivity analysis and budget-constrained optimisation — no calibration data required.
+## Metrics
+| Metric | Value |
+|--------|-------|
+| **Size** | **161.7 GB** |
+| Average bits | 3.5 |
+| WikiText-2 PPL (median) | 6.5236 |
+## Usage
+```python
+from mlx_lm import load, generate
+model, tokenizer = load("baa-ai/Llama-4-Maverick-17B-128E-Instruct-RAM-170GB-MLX")
+response = generate(model, tokenizer, prompt="Hello!", max_tokens=256)
+print(response)
+```
+---
+*Quantized by [baa.ai](https://baa.ai)*

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,111 @@

+{{- bos_token }}
+{%- if custom_tools is defined and custom_tools%}
+    {%- set tools = custom_tools %}
+{%- endif %}
+{%- if tools is defined and tools %}
+    {%- set tool_definition = tool_definition ~ (tools | tojson(indent=4)) %}
+{%- else %}
+    {%- set tools = none %}
+{%- endif %}
+{#- This block extracts the system message, so we can slot it into the right place. #}
+{%- if messages[0]['role'] == 'system' %}
+    {%- set user_provided_system_message = true %}
+    {%- if messages[0]['content'] is string %}
+        {%- set system_message = messages[0]['content']|trim %}
+    {%- else %}
+        {%- set system_message = messages[0]['content'][0]['text']|trim %}
+    {%- endif %}
+    {%- set messages = messages[1:] %}
+{%- else %}
+    {%- if tools is not none  %}
+        {#- Since not system_message was provided by user, if tool is provided, system_message is now default tool system message #}
+        {#- This system message is from llama website:https://www.llama.com/docs/model-cards-and-prompt-formats/llama4/  #}
+        {%- set system_message = "You are a helpful assistant and an expert in function composition. You can answer general questions using your internal knowledge OR invoke functions when necessary. Follow these strict guidelines:\n\n1. FUNCTION CALLS:\n- ONLY use functions that are EXPLICITLY listed in the function list below\n- If NO functions are listed (empty function list []), respond ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\"\n- If a function is not in the list, respond ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\"\n- If ALL required parameters are present AND the query EXACTLY matches a listed function's purpose: output ONLY the function call(s)\n- Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]\nExamples:\nCORRECT: [get_weather(location=\"Vancouver\"), calculate_route(start=\"Boston\", end=\"New York\")] <- Only if get_weather and calculate_route are in function list\nINCORRECT: get_weather(location=\"New York\")\nINCORRECT: Let me check the weather: [get_weather(location=\"New York\")]\nINCORRECT: [get_events(location=\"Singapore\")] <- If function not in list\n\n2. RESPONSE RULES:\n- For pure function requests matching a listed function: ONLY output the function call(s)\n- For knowledge questions: ONLY output text\n- For missing parameters: ONLY request the specific missing parameters\n- For unavailable services (not in function list): output ONLY with internal knowledge or \"I don't have access to [Unavailable service] information\". Do NOT execute a function call.\n- If the query asks for information beyond what a listed function provides: output ONLY with internal knowledge about your limitations\n- NEVER combine text and function calls in the same response\n- NEVER suggest alternative functions when the requested service is unavailable\n- NEVER create or invent new functions not listed below\n\n3. STRICT BOUNDARIES:\n- ONLY use functions from the list below - no exceptions\n- NEVER use a function as an alternative to unavailable information\n- NEVER call functions not present in the function list\n- NEVER add explanatory text to function calls\n- NEVER respond with empty brackets\n- Use proper Python/JSON syntax for function calls\n- Check the function list carefully before responding\n\n4. TOOL RESPONSE HANDLING:\n- When receiving tool responses: provide concise, natural language responses\n- Don't repeat tool response verbatim\n- Don't add supplementary information\n\nHere is a list of functions in JSON format that you can invoke:\n" %}
+    {%- else %}
+        {%- set system_message = "" %}
+    {%- endif %}
+{%- endif %}
+{#- Now writing the system message: use the user provided system message if user_provided_system_message, else default tool system message if tools presented #}
+{%- if system_message %}
+    {#- always use user provided system message to override default tool system message #}
+    {{- "<|header_start|>system<|header_end|>\n\n" }}
+    {{- system_message }}
+    {%- if user_provided_system_message and tools %}
+        {{- "\nHere is a list of functions in JSON format that you can invoke. Use exact format: [func_name1(param1=value1, param2=value2), func_name2(...)]\n" }}
+        {{- tool_definition -}}
+        {%- elif tool_definition %}
+        {{- tool_definition -}}
+    {%- endif %}
+    {{- "<|eot|>" }}
+{%- endif %}
+{#- Now deal with all other messages #}
+{%- for message in messages %}
+    {#- Base case: messages that are not from tool role and has empty tool_call list  #}
+    {%- if not (message.role == 'ipython' or message.role == 'tool' or ('tool_calls' in message and  message.tool_calls|length != 0 )) %}
+        {{- '<|header_start|>' + message['role'] + '<|header_end|>\n\n' }}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] | trim }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+    {{- "<|eot|>" }}
+    {#- Tool case: messages has non-empty tool_call list, must from assistant #}
+    {%- elif 'tool_calls' in message %}
+        {#- assume tool_calls are always coming from assistant #}
+        {%- if message.role == 'assistant' %}
+            {{- '<|header_start|>assistant<|header_end|>\n\n' -}}
+        {%- if message['content'] is string %}
+            {{- message['content'] }}
+        {%- else %}
+            {%- for content in message['content'] %}
+                {%- if content['type'] == 'image' %}
+                    {{- '<|image|>' }}
+                {%- elif content['type'] == 'text' %}
+                    {{- content['text'] }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+            {{- "[" }}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+                {{-  tool_call.name + '(' -}}
+            {%- for param in tool_call.arguments %}
+                {{- param + '="' -}}
+                {{- "%s" | format(tool_call.arguments[param]) -}}
+                {{- '"' -}}
+                {% if not loop.last %}, {% endif %}
+            {%- endfor %}
+            {{- ')' -}}
+            {% if not loop.last %}, {% endif %}
+        {%- endfor %}
+        {{- "]<|eot|>" }}
+{%- endif %}
+{#- Tool_response case: messages are from tool_response  #}
+    {%- elif message.role == "tool" or message.role == "ipython" %}
+        {{- "<|header_start|>ipython<|header_end|>\n\n" }}
+        {%- if message.content is string %}
+            {{-  message.content  | tojson }}
+        {%- else %}
+            {%- for content in message['content']  %}
+                {%- if content['type']  == 'text' %}
+                    {{-  content['text'] | tojson }}
+                {%- endif %}
+            {%- endfor %}
+        {%- endif %}
+        {{- "<|eot|>" }}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|header_start|>assistant<|header_end|>\n\n' }}
+{%- endif %}

config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

generation_config.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "bos_token_id": 200000,
+  "do_sample": true,
+  "eos_token_id": [
+    200001,
+    200007,
+    200008
+  ],
+  "pad_token_id": 200018,
+  "temperature": 0.6,
+  "top_p": 0.9,
+  "transformers_version": "4.51.0.dev0"
+}

model-00001-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:01d2c78fda55834c2b11f106c808c40df898fc149dad3155b6cbe6c886a57470
+size 5206861031

model-00002-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0f5a7f8e252854dcbf92fc681c0f2bb509e849ba24828f3ee4047840d3fa388
+size 5358477235

model-00003-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:041fbd871455b3bfafdab81636382a744e5ef51d7492e87bb94667d62cabfd8f
+size 3688701437

model-00004-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4292d6eeadf9734831760f2a68b922c2ae6c915edf127d95fc3273fd89e9eb3d
+size 4910292221

model-00005-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:791e1347a589fcccf178b8c7a312bb502687ce44edcc455b9ca32cb8433c6b85
+size 4697621388

model-00006-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76e4eae220eeee6d4c7ae533c39b134bbbe8537f35d651e394f931a2e7d1708e
+size 4914224489

model-00007-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3509059e44b622ad50ab154e299557f2eceec24f7084833e1728f84384545f15
+size 4914224396

model-00008-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c1b028797eb500e6983ed0ee75f771758e7f2f4596d810ba50aaaa4bfe69cdb
+size 4697621396

model-00009-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd551f3f87459be5e7b99a0b9828aef39b307bebdac0bee597713f3a84d68952
+size 4914224470

model-00010-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:663b9d9c1127bb5094b13d90d29e28d55b07cfabc9d71e1eb0b2de29278e45f8
+size 4918156606

model-00011-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:857994ef439dcb99444015c44a95b13258218e5baf80dc95feafa986d3e00616
+size 4697621392

model-00012-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7ac8e55a1663ad5ef836712d4ce59ad83bd4d352efe8cc30694eb91fd04538f
+size 4918156626

model-00013-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a04323d5ca4f46e7c6a907e8f16e1c53491351d3cc4dcc5606470cb399dfcba
+size 4918156550

model-00014-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8007e2a0c98b7c0bbde086086312fe4e35d420161ce0f16633ef67e37c1d750c
+size 4697621396

model-00015-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:040da2f4f409e9b3b4d37238a7a7e718e32c21a45368e99648795ba7817d3dd8
+size 4918156704

model-00016-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d84995475ff838a55e353bb44c723dc53dd0c9ee2319a22755e4191781f7a9d
+size 4918156678

model-00017-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed7cd476af44bcb46234a40ce51f152a0a51cb57272935fd9f470c3fa758b329
+size 4697621394

model-00018-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1118cabc1c43007d5fea4ad9aeb49940d82e5ce5c68feb9822bdb874880f661b
+size 4918156706

model-00019-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9bc3e3528bb06f1d65b5954dc8039624c8b66e5b3fa7cfa786242a953c67da9
+size 4918156636

model-00020-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bec11a21409c6169c56c1445544ebecfa8b6a9b8d067b869d0b42913c86748fb
+size 4697621398

model-00021-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f02460970fdc91856e1c637ac3932461f31448b41361132dc8095b54b86f4c42
+size 4918156706

model-00022-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4af54ba13c012e9e5086f72c0bcd93467fdeb8255b64c7be7e5676a4b8cb660b
+size 4918156608

model-00023-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16fee2691fba5c648a377e5c62dac6e051db3904762eaf08d6d8a341c3238b16
+size 4697621396

model-00024-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1499576fdd366afd980852721ac82609b0813bf65c7fe2d9fe1f0c26337f2429
+size 4918156704

model-00025-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:276ef55af6d65d1f5100fbcd89a81ba5b5c24592c43a0595ced528da594dae31
+size 4918156598

model-00026-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1f247b94523f8daea68937086e874935977e5a69f5cf752d9bc8123ffa6b0fe
+size 4697621398

model-00027-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a56e58f01620aff32f0a14c102b904fd226442babff4f318c242c1a289cccf84
+size 4918156696

model-00028-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:561bc928230e92d3d5b9374ddef1909a8f702345e525ed77e49a27287454f43e
+size 4918156644

model-00029-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1945986238416e24845dbecdb7f5f26e62fc0d1b98b9f735444b0642fc4d81c0
+size 4697621392

model-00030-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79dfa6e11f7ddbab8048bab1649517ce50b24781b422189641f30c5413f6da6e
+size 4918156658

model-00031-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7163935fbc0bdf5a5d842f9643315c0eda036d2db2b0c3eee5b61d8d16b0317d
+size 4918156540

model-00032-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:83ef8f9d2fc44a2b120cbab1a96703082b1b654772333f6a50397ba9bb352e31
+size 4697621396

model-00033-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e4722bb0333fa4e5c39a2f9561871571591dea7687522be1849393372da48927
+size 4918156672

model-00034-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d286dcfaea34c5c6de4c04a4f2b8f832ab41718930edf7d59b08602708d34bb9
+size 4918156602

model-00035-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90ec7c1d8642944a2f87326fac55750f7577d434fafcf3e4859a45af71a49e4f
+size 4697621398

model-00036-of-00036.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:011e404a9d10a43d2f856fe01ce1892f5362bfbc4818a1d9a5cfb7b26d826e0d
+size 4473152154

model.safetensors.index.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:172c9eb4beafc72601690da3ccfcede5c2e6806a8d5ec1fca33e22acea8023a4
+size 27948578

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "backend": "tokenizers",
+  "bos_token": "<|begin_of_text|>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|eot|>",
+  "is_local": true,
+  "model_input_names": [
+    "input_ids",
+    "attention_mask"
+  ],
+  "model_max_length": 1048576,
+  "pad_token": "<|finetune_right_pad|>",
+  "processor_class": "Llama4Processor",
+  "tokenizer_class": "TokenizersBackend"
+}