Add files using upload-large-folder tool

Browse files

Files changed (10) hide show

.gitattributes +4 -0
README.md +62 -0
config_cpu.json +1 -0
llm.mnn +3 -0
llm.mnn.json +3 -0
llm.mnn.weight +3 -0
llm_config.json +11 -0
onnx/llm.onnx +3 -0
tokenizer.json +3 -0
tokenizer.txt +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+llm.mnn.json filter=lfs diff=lfs merge=lfs -text
+llm.mnn filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
+llm.mnn.weight filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+---
+license: apache-2.0
+language:
+- en
+tags:
+- tokforge
+- mnn
+- android
+- mobile
+- speculative-decoding
+- qwen3.5
+- draft-model
+- experimental
+- text-generation
+pipeline_tag: text-generation
+inference: false
+---
+# Qwen3.5-0.8B-lk-alpha-ep4-MNN
+Experimental `Qwen3.5-0.8B` draft bundle for **TokForge + MNN** speculative decoding research.
+## Why this repo exists
+This repo captures an acceptance-oriented `Qwen3.5-0.8B` draft experiment exported into a ready-to-run `MNN` bundle.
+It is here because people asked for the actual artifacts behind the work, not because it is the current default recommendation.
+## Training snapshot
+For the associated `LK Alpha` training lane:
+- final reported acceptance (`alpha`) was in the `0.6972` on the small Qwen3.5 dataset
+## Status
+This is currently best treated as **experimental**:
+- useful if you want to inspect the `Qwen3.5-0.8B` draft path
+- useful for reproducing training/export experiments
+- not currently the top practical mobile recommendation versus the stronger `Qwen3-0.6B` draft lane
+## Included files
+- `llm.mnn`
+- `llm.mnn.weight`
+- `llm_config.json`
+- `config_cpu.json`
+- tokenizer files
+- ONNX export artifact for reference
+## Notes
+- This is an `MNN` runtime bundle for TokForge-style use.
+- It is not a standard HF Transformers checkpoint.
+## TokForge
+- Website: [tokforge.ai](https://tokforge.ai)
+- Discord: [Join the Discord](https://discord.gg/Acv3CBtfVm)
+If you benchmark this on your own device, feel free to share results in Discord.

config_cpu.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"backend_type": "cpu", "thread_num": 4, "sampler_type": "greedy"}

llm.mnn ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd319803b4c616912a0b389f6fa3057a9adc8f1d1808d9c340b815b9ce802310
+size 2185672

llm.mnn.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a01d25cc26a2e4ba0321de0d9308bf24f86e8edb0333a918a2753ea3facc6673
+size 18181483

llm.mnn.weight ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8bc80d360b0426f7fc62c10f818ce08fefbff21627b4166da6dc46bcba0eb25d
+size 423389206

llm_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "model_type": "qwen3_5",
+    "hidden_size": 1024,
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "intermediate_size": 2816,
+    "vocab_size": 495680,
+    "attention_mask": "float",
+    "prompt_template": "<|im_start|>user\n%s<|im_end|>\n<|im_start|>assistant\n",
+    "is_visual": false
+}

onnx/llm.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:705c16af2088bd313daf1fe3cb14a1c41d8db0365206a6e8da74743a06ff5c82
+size 2445273

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f9e4d4901a92b997e463c1f46055088b6cca5ca61a6522d1b9f64c4bb81cb42
+size 12807982

tokenizer.txt ADDED Viewed

The diff for this file is too large to render. See raw diff