darkmaniac7 commited on
Commit
f17c0c9
·
verified ·
1 Parent(s): 82693e4

Add files using upload-large-folder tool

Browse files
Files changed (10) hide show
  1. .gitattributes +4 -0
  2. README.md +62 -0
  3. config_cpu.json +1 -0
  4. llm.mnn +3 -0
  5. llm.mnn.json +3 -0
  6. llm.mnn.weight +3 -0
  7. llm_config.json +11 -0
  8. onnx/llm.onnx +3 -0
  9. tokenizer.json +3 -0
  10. tokenizer.txt +0 -0
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llm.mnn.json filter=lfs diff=lfs merge=lfs -text
37
+ llm.mnn filter=lfs diff=lfs merge=lfs -text
38
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ llm.mnn.weight filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - en
5
+ tags:
6
+ - tokforge
7
+ - mnn
8
+ - android
9
+ - mobile
10
+ - speculative-decoding
11
+ - qwen3.5
12
+ - draft-model
13
+ - experimental
14
+ - text-generation
15
+ pipeline_tag: text-generation
16
+ inference: false
17
+ ---
18
+
19
+ # Qwen3.5-0.8B-lk-alpha-ep4-MNN
20
+
21
+ Experimental `Qwen3.5-0.8B` draft bundle for **TokForge + MNN** speculative decoding research.
22
+
23
+ ## Why this repo exists
24
+
25
+ This repo captures an acceptance-oriented `Qwen3.5-0.8B` draft experiment exported into a ready-to-run `MNN` bundle.
26
+
27
+ It is here because people asked for the actual artifacts behind the work, not because it is the current default recommendation.
28
+
29
+ ## Training snapshot
30
+
31
+ For the associated `LK Alpha` training lane:
32
+
33
+ - final reported acceptance (`alpha`) was in the `0.6972` on the small Qwen3.5 dataset
34
+
35
+ ## Status
36
+
37
+ This is currently best treated as **experimental**:
38
+
39
+ - useful if you want to inspect the `Qwen3.5-0.8B` draft path
40
+ - useful for reproducing training/export experiments
41
+ - not currently the top practical mobile recommendation versus the stronger `Qwen3-0.6B` draft lane
42
+
43
+ ## Included files
44
+
45
+ - `llm.mnn`
46
+ - `llm.mnn.weight`
47
+ - `llm_config.json`
48
+ - `config_cpu.json`
49
+ - tokenizer files
50
+ - ONNX export artifact for reference
51
+
52
+ ## Notes
53
+
54
+ - This is an `MNN` runtime bundle for TokForge-style use.
55
+ - It is not a standard HF Transformers checkpoint.
56
+
57
+ ## TokForge
58
+
59
+ - Website: [tokforge.ai](https://tokforge.ai)
60
+ - Discord: [Join the Discord](https://discord.gg/Acv3CBtfVm)
61
+
62
+ If you benchmark this on your own device, feel free to share results in Discord.
config_cpu.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"backend_type": "cpu", "thread_num": 4, "sampler_type": "greedy"}
llm.mnn ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd319803b4c616912a0b389f6fa3057a9adc8f1d1808d9c340b815b9ce802310
3
+ size 2185672
llm.mnn.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a01d25cc26a2e4ba0321de0d9308bf24f86e8edb0333a918a2753ea3facc6673
3
+ size 18181483
llm.mnn.weight ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bc80d360b0426f7fc62c10f818ce08fefbff21627b4166da6dc46bcba0eb25d
3
+ size 423389206
llm_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "qwen3_5",
3
+ "hidden_size": 1024,
4
+ "num_attention_heads": 16,
5
+ "num_hidden_layers": 24,
6
+ "intermediate_size": 2816,
7
+ "vocab_size": 495680,
8
+ "attention_mask": "float",
9
+ "prompt_template": "<|im_start|>user\n%s<|im_end|>\n<|im_start|>assistant\n",
10
+ "is_visual": false
11
+ }
onnx/llm.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705c16af2088bd313daf1fe3cb14a1c41d8db0365206a6e8da74743a06ff5c82
3
+ size 2445273
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f9e4d4901a92b997e463c1f46055088b6cca5ca61a6522d1b9f64c4bb81cb42
3
+ size 12807982
tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff