Yılmaz KARAAĞAÇ commited on
Commit
e5c4fa4
·
verified ·
1 Parent(s): 2406b15

Add MNN 4-bit quantized model with Model Card

Browse files
Files changed (9) hide show
  1. .gitattributes +2 -0
  2. README.md +38 -0
  3. config.json +10 -0
  4. export_args.json +41 -0
  5. llm.mnn +3 -0
  6. llm.mnn.json +0 -0
  7. llm.mnn.weight +3 -0
  8. llm_config.json +19 -0
  9. tokenizer.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llm.mnn filter=lfs diff=lfs merge=lfs -text
37
+ llm.mnn.weight filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: other
5
+ tags:
6
+ - mnn
7
+ - on-device
8
+ - android
9
+ - ios
10
+ - quantization
11
+ - int4
12
+ - text-generation
13
+ - gemma
14
+ pipeline_tag: text-generation
15
+ library_name: mnn
16
+ base_model: WhoIsShe/gemma-3-1b-it-heretic-extreme-uncensored-abliterated
17
+ ---
18
+
19
+ # Gemma-3-1B (MNN Quantized)
20
+
21
+ This is a **4-bit quantized** version of the Gemma-3-1B model, optimized for **on-device inference** (Android/iOS) using the [Alibaba MNN framework](https://github.com/alibaba/MNN).
22
+
23
+ ## 🚀 Fast Deployment on Android
24
+
25
+ ### 1. Download the App
26
+ Don't build from scratch! Use the official MNN Chat Android app:
27
+ * **[Download APK (GitHub)](https://github.com/alibaba/MNN/releases)**
28
+
29
+ ### 2. Setup
30
+ 1. Download the files from this repo (`llm.mnn`, `llm.mnn.weight`, `config.json`).
31
+ 2. Create a folder on your phone: `/sdcard/MNN/gemma-3-1b`.
32
+ 3. Copy the files into that folder.
33
+ 4. Open the MNN App and select your folder.
34
+
35
+ ## 💻 Technical Details
36
+ * **Framework:** MNN
37
+ * **Quantization:** 4-bit Asymmetric (Int4)
38
+ * **Model Type:** Gemma-3-1B (Uncensored)
config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "llm_model": "llm.mnn",
3
+ "llm_weight": "llm.mnn.weight",
4
+ "backend_type": "cpu",
5
+ "thread_num": 4,
6
+ "precision": "low",
7
+ "memory": "low",
8
+ "sampler_type": "penalty",
9
+ "penalty": 1.1
10
+ }
export_args.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "path": "/content/models/gemma-3-1b-it-heretic-extreme-uncensored-abliterated",
3
+ "type": null,
4
+ "tokenizer_path": "/content/models/gemma-3-1b-it-heretic-extreme-uncensored-abliterated",
5
+ "eagle_path": null,
6
+ "lora_path": null,
7
+ "gptq_path": null,
8
+ "dst_path": "/kaggle/working/mnn_output",
9
+ "verbose": true,
10
+ "test": null,
11
+ "export": "mnn",
12
+ "onnx_slim": false,
13
+ "quant_bit": 4,
14
+ "quant_block": 64,
15
+ "visual_quant_bit": null,
16
+ "visual_quant_block": null,
17
+ "lm_quant_bit": 4,
18
+ "lm_quant_block": 64,
19
+ "mnnconvert": "../../../build/MNNConvert",
20
+ "ppl": false,
21
+ "awq": false,
22
+ "hqq": false,
23
+ "omni": false,
24
+ "transformer_fuse": false,
25
+ "group_conv_native": false,
26
+ "smooth": false,
27
+ "sym": false,
28
+ "visual_sym": false,
29
+ "seperate_embed": false,
30
+ "lora_split": false,
31
+ "calib_data": null,
32
+ "act_bit": 16,
33
+ "embed_bit": 16,
34
+ "act_sym": false,
35
+ "generate_for_npu": false,
36
+ "skip_weight": false,
37
+ "omni_epochs": 20,
38
+ "omni_lr": 0.005,
39
+ "omni_wd": 0.0001,
40
+ "tie_word_embeddings": true
41
+ }
llm.mnn ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb5c2bdaa7fc2807af6c2f80aab77b2375e3b5d8a21ff5b8e001a77e217b8d9e
3
+ size 538064
llm.mnn.json ADDED
The diff for this file is too large to render. See raw diff
 
llm.mnn.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:813c1d60a94d90e980fb0a409b87756b5e07cc1c7439032014856ea7f70948ba
3
+ size 625923006
llm_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "gemma3_text",
3
+ "hidden_size": 1152,
4
+ "attention_mask": "float",
5
+ "attention_type": "mix",
6
+ "sliding_window": 512,
7
+ "jinja": {
8
+ "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '<start_of_turn>' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '<start_of_image>' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '<end_of_turn>\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{'<start_of_turn>model\n'}}\n{%- endif -%}\n",
9
+ "bos": "<bos>",
10
+ "eos": "<eos>"
11
+ },
12
+ "tie_embeddings": [
13
+ 437179326,
14
+ 588174270,
15
+ 37748736,
16
+ 4,
17
+ 64
18
+ ]
19
+ }
tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff