Update README.md
Browse files
README.md
CHANGED
|
@@ -60,15 +60,31 @@ by training a **draft decoder layer** separately.
|
|
| 60 |
### 3. Example Code
|
| 61 |
|
| 62 |
```python
|
| 63 |
-
from transformers import AutoTokenizer
|
| 64 |
from eagle.model.ea_model import EaModel
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
| 66 |
tokenizer = AutoTokenizer.from_pretrained('allenai/OLMoE-1B-7B-0125-Instruct')
|
| 67 |
model = EaModel.from_pretrained(
|
| 68 |
base_model_path='allenai/OLMoE-1B-7B-0125-Instruct',
|
| 69 |
-
ea_model_path='
|
| 70 |
-
torch_dtype='bfloat16'
|
|
|
|
|
|
|
| 71 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
```
|
| 73 |
|
| 74 |
---
|
|
|
|
| 60 |
### 3. Example Code
|
| 61 |
|
| 62 |
```python
|
|
|
|
| 63 |
from eagle.model.ea_model import EaModel
|
| 64 |
+
from fastchat.model import get_conversation_template
|
| 65 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 66 |
+
import torch
|
| 67 |
+
|
| 68 |
tokenizer = AutoTokenizer.from_pretrained('allenai/OLMoE-1B-7B-0125-Instruct')
|
| 69 |
model = EaModel.from_pretrained(
|
| 70 |
base_model_path='allenai/OLMoE-1B-7B-0125-Instruct',
|
| 71 |
+
ea_model_path='wantsleep/OLMoE_1B_7B_Eagle3',
|
| 72 |
+
torch_dtype='bfloat16',
|
| 73 |
+
low_cpu_mem_usage=True,
|
| 74 |
+
total_token=-1
|
| 75 |
)
|
| 76 |
+
|
| 77 |
+
your_message = "Why we study math?"
|
| 78 |
+
conv = get_conversation_template("vicuna")
|
| 79 |
+
conv.append_message(conv.roles[0], your_message)
|
| 80 |
+
conv.append_message(conv.roles[1], None)
|
| 81 |
+
prompt = conv.get_prompt()
|
| 82 |
+
input_ids = model.tokenizer([prompt]).input_ids
|
| 83 |
+
input_ids = torch.as_tensor(input_ids).to(DEVICE)
|
| 84 |
+
|
| 85 |
+
output_ids = model.eagenerate(input_ids, temperature=0.5, max_new_tokens=512, top_k=8)
|
| 86 |
+
output = model.tokenizer.decode(output_ids[0])
|
| 87 |
+
print(output)
|
| 88 |
```
|
| 89 |
|
| 90 |
---
|