wantsleep
/

OLMoE_1B_7B_Eagle3

Model card Files Files and versions

wantsleep commited on Jul 24, 2025

Commit

b12cfe7

·

verified ·

1 Parent(s): a81b86f

Update README.md

Files changed (1) hide show

README.md +20 -4

README.md CHANGED Viewed

@@ -60,15 +60,31 @@ by training a **draft decoder layer** separately.
 ### 3. Example Code
 ```python
-from transformers import AutoTokenizer
 from eagle.model.ea_model import EaModel
 tokenizer = AutoTokenizer.from_pretrained('allenai/OLMoE-1B-7B-0125-Instruct')
 model = EaModel.from_pretrained(
     base_model_path='allenai/OLMoE-1B-7B-0125-Instruct',
-    ea_model_path='path/to/this/repo',
-    torch_dtype='bfloat16'
 )
 ```
 ---

 ### 3. Example Code
 ```python
 from eagle.model.ea_model import EaModel
+from fastchat.model import get_conversation_template
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
 tokenizer = AutoTokenizer.from_pretrained('allenai/OLMoE-1B-7B-0125-Instruct')
 model = EaModel.from_pretrained(
     base_model_path='allenai/OLMoE-1B-7B-0125-Instruct',
+    ea_model_path='wantsleep/OLMoE_1B_7B_Eagle3',
+    torch_dtype='bfloat16',
+    low_cpu_mem_usage=True,
+    total_token=-1
 )
+your_message = "Why we study math?"
+conv = get_conversation_template("vicuna")
+conv.append_message(conv.roles[0], your_message)
+conv.append_message(conv.roles[1], None)
+prompt = conv.get_prompt()
+input_ids = model.tokenizer([prompt]).input_ids
+input_ids = torch.as_tensor(input_ids).to(DEVICE)
+output_ids = model.eagenerate(input_ids, temperature=0.5, max_new_tokens=512, top_k=8)
+output = model.tokenizer.decode(output_ids[0])
+print(output)
 ```
 ---