Add files using upload-large-folder tool
Browse files- .gitattributes +5 -0
- README.md +465 -0
- chat_template.jinja +154 -0
- config.json +155 -0
- generation_config.json +13 -0
- model-00001-of-00062.safetensors +3 -0
- model-00002-of-00062.safetensors +3 -0
- model-00003-of-00062.safetensors +3 -0
- model-00005-of-00062.safetensors +3 -0
- model-00006-of-00062.safetensors +3 -0
- model-00009-of-00062.safetensors +3 -0
- model-00012-of-00062.safetensors +3 -0
- model-00015-of-00062.safetensors +3 -0
- model-00016-of-00062.safetensors +3 -0
- model-00019-of-00062.safetensors +3 -0
- model-00021-of-00062.safetensors +3 -0
- model-00022-of-00062.safetensors +3 -0
- model-00024-of-00062.safetensors +3 -0
- model-00025-of-00062.safetensors +3 -0
- model-00027-of-00062.safetensors +3 -0
- model-00028-of-00062.safetensors +3 -0
- model-00029-of-00062.safetensors +3 -0
- model-00030-of-00062.safetensors +3 -0
- model-00031-of-00062.safetensors +3 -0
- model-00032-of-00062.safetensors +3 -0
- model-00033-of-00062.safetensors +3 -0
- model-00045-of-00062.safetensors +3 -0
- model-00046-of-00062.safetensors +3 -0
- model-00047-of-00062.safetensors +3 -0
- model-00048-of-00062.safetensors +3 -0
- model-00049-of-00062.safetensors +3 -0
- model-00051-of-00062.safetensors +3 -0
- model-00052-of-00062.safetensors +3 -0
- model-00053-of-00062.safetensors +3 -0
- model-00054-of-00062.safetensors +3 -0
- model-00055-of-00062.safetensors +3 -0
- model-00057-of-00062.safetensors +3 -0
- model-00058-of-00062.safetensors +3 -0
- model-00059-of-00062.safetensors +3 -0
- model-00060-of-00062.safetensors +3 -0
- model-00061-of-00062.safetensors +3 -0
- model-00062-of-00062.safetensors +3 -0
- model.safetensors.index.json +3 -0
- preprocessor_config.json +21 -0
- processor_config.json +60 -0
- quantization_config.json +3 -0
- tokenizer.json +3 -0
- tokenizer_config.json +33 -0
- video_preprocessor_config.json +21 -0
- vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/ornith_397b_eval.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
assets/ornith_logo.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
quantization_config.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,465 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
library_name: transformers
|
| 3 |
+
license: mit
|
| 4 |
+
license_link: https://huggingface.co/deepreinforce-ai/Ornith-1.0-397B/blob/main/LICENSE
|
| 5 |
+
pipeline_tag: text-generation
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
<img width="600px" src="assets/ornith_logo.png">
|
| 10 |
+
|
| 11 |
+
[](https://deep-reinforce.com/ornith.html)
|
| 12 |
+
|
| 13 |
+
# Ornith-1.0-397B
|
| 14 |
+
|
| 15 |
+
Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
|
| 16 |
+
|
| 17 |
+
Highlights:
|
| 18 |
+
|
| 19 |
+
- **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
|
| 20 |
+
- **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.
|
| 21 |
+
- **Licence**: MIT licensed, globally accessible, and free from regional limitations.
|
| 22 |
+
|
| 23 |
+
<img style="width: 100%; max-width: 900px;" src="assets/ornith_397b_eval.png" alt="Ornith 35B Benchmark Results" title="Ornith 35B Benchmark Results">
|
| 24 |
+
|
| 25 |
+
## Ornith 1.0 397B
|
| 26 |
+
|
| 27 |
+
This model card documents **Ornith-1.0-397B**, the lightweight member of the Ornith family, designed for efficient single-GPU deployment.
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
### Benchmarks
|
| 31 |
+
|
| 32 |
+
<div style="font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;width:100%;margin:0 auto;padding:16px 0">
|
| 33 |
+
<table style="width:100%;table-layout:fixed;border-collapse:collapse;font-size:13px">
|
| 34 |
+
<thead><tr>
|
| 35 |
+
<th style="width:24%;padding:10px 7px;text-align:left;font-weight:600;border-bottom:2px solid #FD8E5B;color:#FD8E5B"></th>
|
| 36 |
+
<th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:700;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px;background:rgba(253, 142, 91, 0.12)">Ornith-1.0-397B</th>
|
| 37 |
+
<th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Qwen3.5-397B</th>
|
| 38 |
+
<th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Qwen3.7-Max</th>
|
| 39 |
+
<th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">GLM-5.2-744B</th>
|
| 40 |
+
<th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Minimax-M3-428B</th>
|
| 41 |
+
<th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">DeepSeek-V4-Pro-1.6T</th>
|
| 42 |
+
<th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Claude Opus 4.7</th>
|
| 43 |
+
<th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Claude Opus 4.8</th>
|
| 44 |
+
</tr></thead>
|
| 45 |
+
<tbody>
|
| 46 |
+
<tr><td colspan="9" style="padding:8px 12px;font-weight:600;color:#FD8E5B;border-bottom:1px solid rgba(253, 142, 91, 0.2);background:rgba(253, 142, 91, 0.1)">Agentic Coding</td></tr>
|
| 47 |
+
<tr>
|
| 48 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">Terminal-Bench 2.1 <sub><small>(Terminus-2)</small></sub></td>
|
| 49 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">77.5</td>
|
| 50 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">53.5</td>
|
| 51 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">73.5</td>
|
| 52 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">81.0</td>
|
| 53 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">64</td>
|
| 54 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">64</td>
|
| 55 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">70.3</td>
|
| 56 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">85</td>
|
| 57 |
+
</tr>
|
| 58 |
+
<tr>
|
| 59 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">Terminal-Bench 2.1 <sub><small>(Claude Code)</small></sub></td>
|
| 60 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">78.2</td>
|
| 61 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">48.6</td>
|
| 62 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.8</td>
|
| 63 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">82.7</td>
|
| 64 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 65 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">66.5</td>
|
| 66 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.7</td>
|
| 67 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">78.9</td>
|
| 68 |
+
</tr>
|
| 69 |
+
<tr>
|
| 70 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE-bench Verified</td>
|
| 71 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">82.4</td>
|
| 72 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">76.4</td>
|
| 73 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">80.4</td>
|
| 74 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 75 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 76 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">80.6</td>
|
| 77 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">80.8</td>
|
| 78 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">87.6</td>
|
| 79 |
+
</tr>
|
| 80 |
+
<tr>
|
| 81 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE-bench Pro</td>
|
| 82 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">62.2</td>
|
| 83 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">51.6</td>
|
| 84 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">60.6</td>
|
| 85 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">62.1</td>
|
| 86 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">59</td>
|
| 87 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">55.4</td>
|
| 88 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">64.3</td>
|
| 89 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.2</td>
|
| 90 |
+
</tr>
|
| 91 |
+
<tr>
|
| 92 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE-bench Multilingual</td>
|
| 93 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">78.9</td>
|
| 94 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.3</td>
|
| 95 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">78.3</td>
|
| 96 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 97 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 98 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">76.2</td>
|
| 99 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 100 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 101 |
+
</tr>
|
| 102 |
+
<tr>
|
| 103 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">NL2Repo</td>
|
| 104 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">48.2</td>
|
| 105 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">36.8</td>
|
| 106 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">47.2</td>
|
| 107 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">48.9</td>
|
| 108 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">42.1</td>
|
| 109 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 110 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 111 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.7</td>
|
| 112 |
+
</tr>
|
| 113 |
+
<tr>
|
| 114 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">Claw-eval Avg</td>
|
| 115 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">77.1</td>
|
| 116 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">70.7</td>
|
| 117 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">65.2</td>
|
| 118 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 119 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 120 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">75.8</td>
|
| 121 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">78.2</td>
|
| 122 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 123 |
+
</tr>
|
| 124 |
+
<tr>
|
| 125 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE Atlas - QnA</td>
|
| 126 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">41.2</td>
|
| 127 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">20.4</td>
|
| 128 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 129 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 130 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">37.9</td>
|
| 131 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">27.2</td>
|
| 132 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">40.3</td>
|
| 133 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">48.8</td>
|
| 134 |
+
</tr>
|
| 135 |
+
<tr>
|
| 136 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE Atlas - RF</td>
|
| 137 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">42.6</td>
|
| 138 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">18.4</td>
|
| 139 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 140 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 141 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 142 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 143 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">48.6</td>
|
| 144 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">46.7</td>
|
| 145 |
+
</tr>
|
| 146 |
+
<tr>
|
| 147 |
+
<td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE Atlas - TW</td>
|
| 148 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">39.1</td>
|
| 149 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">18.5</td>
|
| 150 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 151 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 152 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">30.8</td>
|
| 153 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 154 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">38.5</td>
|
| 155 |
+
<td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
|
| 156 |
+
</tr>
|
| 157 |
+
</tbody>
|
| 158 |
+
</table>
|
| 159 |
+
|
| 160 |
+
<p style="margin-top:12px;font-size:10px;opacity:0.7">
|
| 161 |
+
* Terminal-Bench 2.1 (Terminus-2): We evaluate Terminal-Bench 2.1 using the Harbor/Terminus-2 framework with parser=json, temperature=1.0, top_p=1.0, and a 128K context window. Each run uses a 4-hour timeout with 32 CPU cores and 48GB RAM, and results are averaged over 5 runs. We adjust the Qwen chat template to ensure consistency between training and inference (https://huggingface.co/deepreinforce-ai/Ornith-1.0-397B/blob/main/chat_template.jinja), and modify Harbor to align with vLLM's reasoning_content key.<br/>
|
| 162 |
+
* Terminal-Bench 2.1 (Claude Code): We evaluate Terminal-Bench 2.1 using Claude Code 2.1.126 with parser=json, temperature=1.0, top_p=1.0, max_new_tokens=131072. Results are averaged over 5 runs. Again, Qwen chat template needs to be modified.<br/>
|
| 163 |
+
* SWE-Bench Verified, Pro and Multilingual: using OpenHands harness with temp=1.0, top_p=0.95, 256k context window.<br/>
|
| 164 |
+
* SWE Atlas QnA, RF, TW: using mini SWE agent harness with temp=1.0, top_p=0.95, 128K context window. Results are averaged over 5 runs.<br/>
|
| 165 |
+
* NL2Repo: with temperature=1.0, top_p=1.0, 400K context, 48K output and anti-hacking filters.<br/>
|
| 166 |
+
* ClawEval: An agentic code benchmark over real-user task distributions; temp=0.6 and 256K context.<br/>
|
| 167 |
+
</p>
|
| 168 |
+
|
| 169 |
+
</div>
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
## Quickstart
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
<div style="border-left:4px solid #FD8E5B;background:rgba(253,142,91,0.1);border-radius:6px;padding:12px 16px;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;font-size:14px;line-height:1.6">
|
| 178 |
+
<div style="font-weight:700;color:#FD8E5B;margin-bottom:6px">📝 NOTE</div>
|
| 179 |
+
<p style="margin:0 0 10px"><b>Ornith-1.0-397B</b> is a <b>reasoning model</b>: by default the assistant turn opens with a <code style="background:rgba(253,142,91,0.15);padding:1px 5px;border-radius:4px"><think> … </think></code> block before the final answer. The serving recipes below enable a reasoning parser so the chain-of-thought is returned in a separate <code style="background:rgba(253,142,91,0.15);padding:1px 5px;border-radius:4px">reasoning_content</code> field, and a tool-call parser so the model's <code style="background:rgba(253,142,91,0.15);padding:1px 5px;border-radius:4px"><tool_call></code> blocks are surfaced as OpenAI-style <code style="background:rgba(253,142,91,0.15);padding:1px 5px;border-radius:4px">tool_calls</code>.</p>
|
| 180 |
+
<p style="margin:0 0 6px">Serving Ornith-1.0-397B requires recent runtimes:</p>
|
| 181 |
+
<ul style="margin:0;padding-left:20px">
|
| 182 |
+
<li><b>Transformers</b> ≥ 5.8.1</li>
|
| 183 |
+
<li><b>vLLM</b> ≥ 0.19.1</li>
|
| 184 |
+
<li><b>SGLang</b> ≥ 0.5.9</li>
|
| 185 |
+
</ul>
|
| 186 |
+
</div>
|
| 187 |
+
|
| 188 |
+
### Serving Ornith-1.0-397B
|
| 189 |
+
|
| 190 |
+
The two recipes below stand up an OpenAI-compatible server on a single 8×80GB GPU node (tensor-parallel 8). Adjust `--tensor-parallel-size` / `--tp` to the number of GPUs you have.
|
| 191 |
+
|
| 192 |
+
#### vLLM
|
| 193 |
+
|
| 194 |
+
```bash
|
| 195 |
+
vllm serve deepreinforce-ai/Ornith-1.0-397B \
|
| 196 |
+
--served-model-name Ornith-1.0-397B \
|
| 197 |
+
--tensor-parallel-size 8 \
|
| 198 |
+
--host 0.0.0.0 --port 8000 \
|
| 199 |
+
--max-model-len 262144 \
|
| 200 |
+
--gpu-memory-utilization 0.90 \
|
| 201 |
+
--enable-prefix-caching \
|
| 202 |
+
--enable-auto-tool-choice --tool-call-parser qwen3_xml \
|
| 203 |
+
--reasoning-parser qwen3 \
|
| 204 |
+
--trust-remote-code
|
| 205 |
+
```
|
| 206 |
+
|
| 207 |
+
#### SGLang
|
| 208 |
+
|
| 209 |
+
```bash
|
| 210 |
+
python -m sglang.launch_server \
|
| 211 |
+
--model-path deepreinforce-ai/Ornith-1.0-397B \
|
| 212 |
+
--served-model-name Ornith-1.0-397B \
|
| 213 |
+
--tp 8 \
|
| 214 |
+
--host 0.0.0.0 --port 8000 \
|
| 215 |
+
--context-length 262144 \
|
| 216 |
+
--mem-fraction-static 0.85 \
|
| 217 |
+
--tool-call-parser qwen3_coder \
|
| 218 |
+
--reasoning-parser qwen3
|
| 219 |
+
```
|
| 220 |
+
|
| 221 |
+
#### Hugging Face Transformers
|
| 222 |
+
|
| 223 |
+
For a quick local test (or to script offline generation), load the model directly with Transformers. Make sure you have a recent release installed — see the [Transformers installation guide](https://huggingface.co/docs/transformers/installation); Ornith-1.0-397B requires `transformers >= 5.8.1`.
|
| 224 |
+
|
| 225 |
+
```python
|
| 226 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 227 |
+
|
| 228 |
+
model_name = "deepreinforce-ai/Ornith-1.0-397B"
|
| 229 |
+
|
| 230 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 231 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 232 |
+
model_name,
|
| 233 |
+
dtype="auto",
|
| 234 |
+
device_map="auto",
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
messages = [
|
| 238 |
+
{"role": "user", "content": "Write a Python function is_prime(n). Keep it short."}
|
| 239 |
+
]
|
| 240 |
+
text = tokenizer.apply_chat_template(
|
| 241 |
+
messages,
|
| 242 |
+
tokenize=False,
|
| 243 |
+
add_generation_prompt=True,
|
| 244 |
+
)
|
| 245 |
+
|
| 246 |
+
inputs = tokenizer(text, return_tensors="pt").to(model.device)
|
| 247 |
+
generated = model.generate(
|
| 248 |
+
**inputs,
|
| 249 |
+
max_new_tokens=512,
|
| 250 |
+
do_sample=True,
|
| 251 |
+
temperature=0.6,
|
| 252 |
+
top_p=0.95,
|
| 253 |
+
top_k=20,
|
| 254 |
+
)
|
| 255 |
+
output_ids = generated[0][inputs.input_ids.shape[1]:]
|
| 256 |
+
|
| 257 |
+
# The reply contains a <think> ... </think> reasoning block followed by the answer.
|
| 258 |
+
content = tokenizer.decode(output_ids, skip_special_tokens=True)
|
| 259 |
+
print(content)
|
| 260 |
+
```
|
| 261 |
+
|
| 262 |
+
To split the reasoning trace from the final answer, parse on the `</think>` marker:
|
| 263 |
+
|
| 264 |
+
```python
|
| 265 |
+
text = tokenizer.decode(output_ids, skip_special_tokens=True)
|
| 266 |
+
if "</think>" in text:
|
| 267 |
+
reasoning, answer = text.split("</think>", 1)
|
| 268 |
+
reasoning = reasoning.replace("<think>", "").strip()
|
| 269 |
+
answer = answer.strip()
|
| 270 |
+
else:
|
| 271 |
+
reasoning, answer = "", text.strip()
|
| 272 |
+
```
|
| 273 |
+
|
| 274 |
+
### Using Ornith-1.0-397B via the Chat Completions API
|
| 275 |
+
|
| 276 |
+
Once a vLLM or SGLang server is running, talk to it with any OpenAI-compatible client.
|
| 277 |
+
|
| 278 |
+
#### Basic Usage
|
| 279 |
+
|
| 280 |
+
```python
|
| 281 |
+
from openai import OpenAI
|
| 282 |
+
|
| 283 |
+
client = OpenAI(
|
| 284 |
+
base_url="http://localhost:8000/v1",
|
| 285 |
+
api_key="EMPTY", # any non-empty string works for a local server
|
| 286 |
+
)
|
| 287 |
+
|
| 288 |
+
response = client.chat.completions.create(
|
| 289 |
+
model="Ornith-1.0-397B",
|
| 290 |
+
messages=[
|
| 291 |
+
{"role": "user", "content": "Write a one-line Python lambda that squares a number."}
|
| 292 |
+
],
|
| 293 |
+
temperature=0.6,
|
| 294 |
+
top_p=0.95,
|
| 295 |
+
max_tokens=1024,
|
| 296 |
+
)
|
| 297 |
+
|
| 298 |
+
message = response.choices[0].message
|
| 299 |
+
# reasoning_content holds the <think> trace; content holds the final answer.
|
| 300 |
+
print("reasoning:", getattr(message, "reasoning_content", None))
|
| 301 |
+
print("answer:", message.content)
|
| 302 |
+
```
|
| 303 |
+
|
| 304 |
+
You can also stream tokens, or hand the model tools — Ornith-1.0-397B emits well-formed function calls that the server parses into the standard `tool_calls` field:
|
| 305 |
+
|
| 306 |
+
```python
|
| 307 |
+
tools = [
|
| 308 |
+
{
|
| 309 |
+
"type": "function",
|
| 310 |
+
"function": {
|
| 311 |
+
"name": "get_weather",
|
| 312 |
+
"description": "Get the current weather for a city",
|
| 313 |
+
"parameters": {
|
| 314 |
+
"type": "object",
|
| 315 |
+
"properties": {"city": {"type": "string"}},
|
| 316 |
+
"required": ["city"],
|
| 317 |
+
},
|
| 318 |
+
},
|
| 319 |
+
}
|
| 320 |
+
]
|
| 321 |
+
|
| 322 |
+
response = client.chat.completions.create(
|
| 323 |
+
model="Ornith-1.0-397B",
|
| 324 |
+
messages=[{"role": "user", "content": "What is the weather in Paris right now?"}],
|
| 325 |
+
tools=tools,
|
| 326 |
+
tool_choice="auto",
|
| 327 |
+
temperature=0.6,
|
| 328 |
+
max_tokens=2048,
|
| 329 |
+
)
|
| 330 |
+
|
| 331 |
+
tool_call = response.choices[0].message.tool_calls[0]
|
| 332 |
+
print(tool_call.function.name, tool_call.function.arguments)
|
| 333 |
+
# -> get_weather {"city": "Paris"}
|
| 334 |
+
```
|
| 335 |
+
|
| 336 |
+
You can point any OpenAI-compatible SDK (Python, Node.js, etc.) or `curl` at the same `/v1/chat/completions` endpoint.
|
| 337 |
+
|
| 338 |
+
## Agentic Usage
|
| 339 |
+
|
| 340 |
+
Ornith-1.0-397B excels in tool-calling and agentic coding capabilities.
|
| 341 |
+
|
| 342 |
+
### Agent Frameworks
|
| 343 |
+
|
| 344 |
+
Because Ornith-1.0-397B exposes an OpenAI-compatible endpoint with tool calling, it works out of the box with standard agent frameworks. Below is a minimal example that connects Ornith-1.0-397B to tools through an MCP server.
|
| 345 |
+
|
| 346 |
+
```python
|
| 347 |
+
import os
|
| 348 |
+
from openai import OpenAI
|
| 349 |
+
|
| 350 |
+
client = OpenAI(
|
| 351 |
+
base_url=os.getenv("OPENAI_BASE_URL", "http://localhost:8000/v1"),
|
| 352 |
+
api_key=os.getenv("OPENAI_API_KEY", "EMPTY"),
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
tools = [
|
| 356 |
+
{
|
| 357 |
+
"type": "function",
|
| 358 |
+
"function": {
|
| 359 |
+
"name": "run_shell",
|
| 360 |
+
"description": "Run a shell command and return its output.",
|
| 361 |
+
"parameters": {
|
| 362 |
+
"type": "object",
|
| 363 |
+
"properties": {
|
| 364 |
+
"command": {"type": "string", "description": "The command to run"}
|
| 365 |
+
},
|
| 366 |
+
"required": ["command"],
|
| 367 |
+
},
|
| 368 |
+
},
|
| 369 |
+
}
|
| 370 |
+
]
|
| 371 |
+
|
| 372 |
+
messages = [{"role": "user", "content": "List the Python files in the current directory."}]
|
| 373 |
+
|
| 374 |
+
response = client.chat.completions.create(
|
| 375 |
+
model="deepreinforce-ai/Ornith-1.0-397B",
|
| 376 |
+
messages=messages,
|
| 377 |
+
tools=tools,
|
| 378 |
+
temperature=0.6,
|
| 379 |
+
top_p=0.95,
|
| 380 |
+
)
|
| 381 |
+
print(response.choices[0].message)
|
| 382 |
+
```
|
| 383 |
+
|
| 384 |
+
**Examples of using Ornith with agent harness:**
|
| 385 |
+
|
| 386 |
+
#### Hermes Agent
|
| 387 |
+
```bash
|
| 388 |
+
# Hermes talks to any OpenAI-compatible endpoint — point it at your Ornith server.
|
| 389 |
+
export OPENAI_BASE_URL="http://localhost:8000/v1"
|
| 390 |
+
export OPENAI_API_KEY="EMPTY"
|
| 391 |
+
export MODEL="deepreinforce-ai/Ornith-1.0-397B"
|
| 392 |
+
```
|
| 393 |
+
|
| 394 |
+
#### OpenClaw
|
| 395 |
+
|
| 396 |
+
```bash
|
| 397 |
+
# OpenClaw talks to any OpenAI-compatible endpoint — point it at your Ornith server.
|
| 398 |
+
export OPENAI_BASE_URL="http://localhost:8000/v1"
|
| 399 |
+
export OPENAI_API_KEY="EMPTY"
|
| 400 |
+
export OPENAI_MODEL="deepreinforce-ai/Ornith-1.0-397B"
|
| 401 |
+
```
|
| 402 |
+
|
| 403 |
+
#### Unsloth Studio
|
| 404 |
+
|
| 405 |
+
```bash
|
| 406 |
+
pip install unsloth
|
| 407 |
+
|
| 408 |
+
# Load Ornith for fast local inference or fine-tuning (Python):
|
| 409 |
+
# from unsloth import FastLanguageModel
|
| 410 |
+
# model, tokenizer = FastLanguageModel.from_pretrained(
|
| 411 |
+
# "deepreinforce-ai/Ornith-1.0-397B",
|
| 412 |
+
# max_seq_length=262144,
|
| 413 |
+
# load_in_4bit=True,
|
| 414 |
+
# )
|
| 415 |
+
```
|
| 416 |
+
|
| 417 |
+
#### OpenHands
|
| 418 |
+
```bash
|
| 419 |
+
pip install openhands-ai
|
| 420 |
+
|
| 421 |
+
# OpenHands routes through LiteLLM; the "openai/" prefix selects the OpenAI-compatible path.
|
| 422 |
+
export LLM_MODEL="openai/deepreinforce-ai/Ornith-1.0-397B"
|
| 423 |
+
export LLM_BASE_URL="http://localhost:8000/v1"
|
| 424 |
+
export LLM_API_KEY="EMPTY"
|
| 425 |
+
|
| 426 |
+
# Launch the CLI (or run the official OpenHands Docker image with the same env vars).
|
| 427 |
+
openhands
|
| 428 |
+
```
|
| 429 |
+
|
| 430 |
+
### Coding CLIs
|
| 431 |
+
|
| 432 |
+
Ornith-1.0-397B is optimized for terminal-based coding agents. Point any OpenAI-compatible coding CLI at your Ornith-1.0-397B endpoint (set `OPENAI_BASE_URL` and `OPENAI_API_KEY`) to understand large codebases, automate tedious work, and ship faster.
|
| 433 |
+
|
| 434 |
+
#### OpenCode
|
| 435 |
+
```bash
|
| 436 |
+
# Register your local Ornith endpoint as a provider in ~/.config/opencode/opencode.json:
|
| 437 |
+
#
|
| 438 |
+
# {
|
| 439 |
+
# "$schema": "https://opencode.ai/config.json",
|
| 440 |
+
# "provider": {
|
| 441 |
+
# "ornith": {
|
| 442 |
+
# "npm": "@ai-sdk/openai-compatible",
|
| 443 |
+
# "name": "Ornith (local)",
|
| 444 |
+
# "options": { "baseURL": "http://localhost:8000/v1", "apiKey": "EMPTY" },
|
| 445 |
+
# "models": { "deepreinforce-ai/Ornith-1.0-397B": { "name": "Ornith-1.0-397B" } }
|
| 446 |
+
# }
|
| 447 |
+
# }
|
| 448 |
+
# }
|
| 449 |
+
|
| 450 |
+
opencode
|
| 451 |
+
```
|
| 452 |
+
|
| 453 |
+
|
| 454 |
+
### Citation
|
| 455 |
+
|
| 456 |
+
If you find our work helpful, feel free to give us a cite.
|
| 457 |
+
|
| 458 |
+
```bibtex
|
| 459 |
+
@misc{ornith_397b,
|
| 460 |
+
title = {{Ornith-1.0-397B}: Agentic Coding, Open to All},
|
| 461 |
+
url = {https://deep-reinforce.com/ornith_1_0.html},
|
| 462 |
+
author = {{DeepReinforce Team}},
|
| 463 |
+
year = {2026}
|
| 464 |
+
}
|
| 465 |
+
```
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- set image_count = namespace(value=0) %}
|
| 2 |
+
{%- set video_count = namespace(value=0) %}
|
| 3 |
+
{%- macro render_content(content, do_vision_count, is_system_content=false) %}
|
| 4 |
+
{%- if content is string %}
|
| 5 |
+
{{- content }}
|
| 6 |
+
{%- elif content is iterable and content is not mapping %}
|
| 7 |
+
{%- for item in content %}
|
| 8 |
+
{%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
|
| 9 |
+
{%- if is_system_content %}
|
| 10 |
+
{{- raise_exception('System message cannot contain images.') }}
|
| 11 |
+
{%- endif %}
|
| 12 |
+
{%- if do_vision_count %}
|
| 13 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 14 |
+
{%- endif %}
|
| 15 |
+
{%- if add_vision_id %}
|
| 16 |
+
{{- 'Picture ' ~ image_count.value ~ ': ' }}
|
| 17 |
+
{%- endif %}
|
| 18 |
+
{{- '<|vision_start|><|image_pad|><|vision_end|>' }}
|
| 19 |
+
{%- elif 'video' in item or item.type == 'video' %}
|
| 20 |
+
{%- if is_system_content %}
|
| 21 |
+
{{- raise_exception('System message cannot contain videos.') }}
|
| 22 |
+
{%- endif %}
|
| 23 |
+
{%- if do_vision_count %}
|
| 24 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 25 |
+
{%- endif %}
|
| 26 |
+
{%- if add_vision_id %}
|
| 27 |
+
{{- 'Video ' ~ video_count.value ~ ': ' }}
|
| 28 |
+
{%- endif %}
|
| 29 |
+
{{- '<|vision_start|><|video_pad|><|vision_end|>' }}
|
| 30 |
+
{%- elif 'text' in item %}
|
| 31 |
+
{{- item.text }}
|
| 32 |
+
{%- else %}
|
| 33 |
+
{{- raise_exception('Unexpected item type in content.') }}
|
| 34 |
+
{%- endif %}
|
| 35 |
+
{%- endfor %}
|
| 36 |
+
{%- elif content is none or content is undefined %}
|
| 37 |
+
{{- '' }}
|
| 38 |
+
{%- else %}
|
| 39 |
+
{{- raise_exception('Unexpected content type.') }}
|
| 40 |
+
{%- endif %}
|
| 41 |
+
{%- endmacro %}
|
| 42 |
+
{%- if not messages %}
|
| 43 |
+
{{- raise_exception('No messages provided.') }}
|
| 44 |
+
{%- endif %}
|
| 45 |
+
{%- if tools and tools is iterable and tools is not mapping %}
|
| 46 |
+
{{- '<|im_start|>system\n' }}
|
| 47 |
+
{{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
|
| 48 |
+
{%- for tool in tools %}
|
| 49 |
+
{{- "\n" }}
|
| 50 |
+
{{- tool | tojson }}
|
| 51 |
+
{%- endfor %}
|
| 52 |
+
{{- "\n</tools>" }}
|
| 53 |
+
{{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
|
| 54 |
+
{%- if messages[0].role == 'system' %}
|
| 55 |
+
{%- set content = render_content(messages[0].content, false, true)|trim %}
|
| 56 |
+
{%- if content %}
|
| 57 |
+
{{- '\n\n' + content }}
|
| 58 |
+
{%- endif %}
|
| 59 |
+
{%- endif %}
|
| 60 |
+
{{- '<|im_end|>\n' }}
|
| 61 |
+
{%- else %}
|
| 62 |
+
{%- if messages[0].role == 'system' %}
|
| 63 |
+
{%- set content = render_content(messages[0].content, false, true)|trim %}
|
| 64 |
+
{{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
|
| 65 |
+
{%- endif %}
|
| 66 |
+
{%- endif %}
|
| 67 |
+
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
| 68 |
+
{%- for message in messages[::-1] %}
|
| 69 |
+
{%- set index = (messages|length - 1) - loop.index0 %}
|
| 70 |
+
{%- if ns.multi_step_tool and message.role == "user" %}
|
| 71 |
+
{%- set content = render_content(message.content, false)|trim %}
|
| 72 |
+
{%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
|
| 73 |
+
{%- set ns.multi_step_tool = false %}
|
| 74 |
+
{%- set ns.last_query_index = index %}
|
| 75 |
+
{%- endif %}
|
| 76 |
+
{%- endif %}
|
| 77 |
+
{%- endfor %}
|
| 78 |
+
{%- if ns.multi_step_tool %}
|
| 79 |
+
{{- raise_exception('No user query found in messages.') }}
|
| 80 |
+
{%- endif %}
|
| 81 |
+
{%- for message in messages %}
|
| 82 |
+
{%- set content = render_content(message.content, true)|trim %}
|
| 83 |
+
{%- if message.role == "system" %}
|
| 84 |
+
{%- if not loop.first %}
|
| 85 |
+
{{- raise_exception('System message must be at the beginning.') }}
|
| 86 |
+
{%- endif %}
|
| 87 |
+
{%- elif message.role == "user" %}
|
| 88 |
+
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
| 89 |
+
{%- elif message.role == "assistant" %}
|
| 90 |
+
{%- set reasoning_content = '' %}
|
| 91 |
+
{%- if message.reasoning_content is string %}
|
| 92 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 93 |
+
{%- else %}
|
| 94 |
+
{%- if '</think>' in content %}
|
| 95 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 96 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 97 |
+
{%- endif %}
|
| 98 |
+
{%- endif %}
|
| 99 |
+
{%- set reasoning_content = reasoning_content|trim %}
|
| 100 |
+
{%- if loop.index0 > ns.last_query_index %}
|
| 101 |
+
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
|
| 102 |
+
{%- else %}
|
| 103 |
+
{{- '<|im_start|>' + message.role + '\n' + content }}
|
| 104 |
+
{%- endif %}
|
| 105 |
+
{%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
|
| 106 |
+
{%- for tool_call in message.tool_calls %}
|
| 107 |
+
{%- if tool_call.function is defined %}
|
| 108 |
+
{%- set tool_call = tool_call.function %}
|
| 109 |
+
{%- endif %}
|
| 110 |
+
{%- if loop.first %}
|
| 111 |
+
{%- if content|trim %}
|
| 112 |
+
{{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
| 113 |
+
{%- else %}
|
| 114 |
+
{{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
| 115 |
+
{%- endif %}
|
| 116 |
+
{%- else %}
|
| 117 |
+
{{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
| 118 |
+
{%- endif %}
|
| 119 |
+
{%- if tool_call.arguments is defined %}
|
| 120 |
+
{%- for args_name, args_value in tool_call.arguments|items %}
|
| 121 |
+
{{- '<parameter=' + args_name + '>\n' }}
|
| 122 |
+
{%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
|
| 123 |
+
{{- args_value }}
|
| 124 |
+
{{- '\n</parameter>\n' }}
|
| 125 |
+
{%- endfor %}
|
| 126 |
+
{%- endif %}
|
| 127 |
+
{{- '</function>\n</tool_call>' }}
|
| 128 |
+
{%- endfor %}
|
| 129 |
+
{%- endif %}
|
| 130 |
+
{{- '<|im_end|>\n' }}
|
| 131 |
+
{%- elif message.role == "tool" %}
|
| 132 |
+
{%- if loop.previtem and loop.previtem.role != "tool" %}
|
| 133 |
+
{{- '<|im_start|>user' }}
|
| 134 |
+
{%- endif %}
|
| 135 |
+
{{- '\n<tool_response>\n' }}
|
| 136 |
+
{{- content }}
|
| 137 |
+
{{- '\n</tool_response>' }}
|
| 138 |
+
{%- if not loop.last and loop.nextitem.role != "tool" %}
|
| 139 |
+
{{- '<|im_end|>\n' }}
|
| 140 |
+
{%- elif loop.last %}
|
| 141 |
+
{{- '<|im_end|>\n' }}
|
| 142 |
+
{%- endif %}
|
| 143 |
+
{%- else %}
|
| 144 |
+
{{- raise_exception('Unexpected message role.') }}
|
| 145 |
+
{%- endif %}
|
| 146 |
+
{%- endfor %}
|
| 147 |
+
{%- if add_generation_prompt %}
|
| 148 |
+
{{- '<|im_start|>assistant\n' }}
|
| 149 |
+
{%- if enable_thinking is defined and enable_thinking is false %}
|
| 150 |
+
{{- '<think>\n\n</think>\n\n' }}
|
| 151 |
+
{%- else %}
|
| 152 |
+
{{- '<think>\n' }}
|
| 153 |
+
{%- endif %}
|
| 154 |
+
{%- endif %}
|
config.json
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3_5MoeForConditionalGeneration"
|
| 4 |
+
],
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"hidden_size": 4096,
|
| 7 |
+
"image_token_id": 248056,
|
| 8 |
+
"model_type": "qwen3_5_moe",
|
| 9 |
+
"text_config": {
|
| 10 |
+
"attention_bias": false,
|
| 11 |
+
"attention_dropout": 0.0,
|
| 12 |
+
"attn_output_gate": true,
|
| 13 |
+
"bos_token_id": null,
|
| 14 |
+
"dtype": "bfloat16",
|
| 15 |
+
"eos_token_id": 248044,
|
| 16 |
+
"full_attention_interval": 4,
|
| 17 |
+
"head_dim": 256,
|
| 18 |
+
"hidden_act": "silu",
|
| 19 |
+
"hidden_size": 4096,
|
| 20 |
+
"initializer_range": 0.02,
|
| 21 |
+
"layer_types": [
|
| 22 |
+
"linear_attention",
|
| 23 |
+
"linear_attention",
|
| 24 |
+
"linear_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"linear_attention",
|
| 27 |
+
"linear_attention",
|
| 28 |
+
"linear_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"linear_attention",
|
| 31 |
+
"linear_attention",
|
| 32 |
+
"linear_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"linear_attention",
|
| 35 |
+
"linear_attention",
|
| 36 |
+
"linear_attention",
|
| 37 |
+
"full_attention",
|
| 38 |
+
"linear_attention",
|
| 39 |
+
"linear_attention",
|
| 40 |
+
"linear_attention",
|
| 41 |
+
"full_attention",
|
| 42 |
+
"linear_attention",
|
| 43 |
+
"linear_attention",
|
| 44 |
+
"linear_attention",
|
| 45 |
+
"full_attention",
|
| 46 |
+
"linear_attention",
|
| 47 |
+
"linear_attention",
|
| 48 |
+
"linear_attention",
|
| 49 |
+
"full_attention",
|
| 50 |
+
"linear_attention",
|
| 51 |
+
"linear_attention",
|
| 52 |
+
"linear_attention",
|
| 53 |
+
"full_attention",
|
| 54 |
+
"linear_attention",
|
| 55 |
+
"linear_attention",
|
| 56 |
+
"linear_attention",
|
| 57 |
+
"full_attention",
|
| 58 |
+
"linear_attention",
|
| 59 |
+
"linear_attention",
|
| 60 |
+
"linear_attention",
|
| 61 |
+
"full_attention",
|
| 62 |
+
"linear_attention",
|
| 63 |
+
"linear_attention",
|
| 64 |
+
"linear_attention",
|
| 65 |
+
"full_attention",
|
| 66 |
+
"linear_attention",
|
| 67 |
+
"linear_attention",
|
| 68 |
+
"linear_attention",
|
| 69 |
+
"full_attention",
|
| 70 |
+
"linear_attention",
|
| 71 |
+
"linear_attention",
|
| 72 |
+
"linear_attention",
|
| 73 |
+
"full_attention",
|
| 74 |
+
"linear_attention",
|
| 75 |
+
"linear_attention",
|
| 76 |
+
"linear_attention",
|
| 77 |
+
"full_attention",
|
| 78 |
+
"linear_attention",
|
| 79 |
+
"linear_attention",
|
| 80 |
+
"linear_attention",
|
| 81 |
+
"full_attention"
|
| 82 |
+
],
|
| 83 |
+
"linear_conv_kernel_dim": 4,
|
| 84 |
+
"linear_key_head_dim": 128,
|
| 85 |
+
"linear_num_key_heads": 16,
|
| 86 |
+
"linear_num_value_heads": 64,
|
| 87 |
+
"linear_value_head_dim": 128,
|
| 88 |
+
"mamba_ssm_dtype": "float32",
|
| 89 |
+
"max_position_embeddings": 262144,
|
| 90 |
+
"mlp_only_layers": [],
|
| 91 |
+
"model_type": "qwen3_5_moe_text",
|
| 92 |
+
"moe_intermediate_size": 1024,
|
| 93 |
+
"mtp_num_hidden_layers": 1,
|
| 94 |
+
"mtp_use_dedicated_embeddings": false,
|
| 95 |
+
"num_attention_heads": 32,
|
| 96 |
+
"num_experts": 512,
|
| 97 |
+
"num_experts_per_tok": 10,
|
| 98 |
+
"num_hidden_layers": 60,
|
| 99 |
+
"num_key_value_heads": 2,
|
| 100 |
+
"output_router_logits": false,
|
| 101 |
+
"pad_token_id": null,
|
| 102 |
+
"partial_rotary_factor": 0.25,
|
| 103 |
+
"rms_norm_eps": 1e-06,
|
| 104 |
+
"rope_parameters": {
|
| 105 |
+
"mrope_interleaved": true,
|
| 106 |
+
"mrope_section": [
|
| 107 |
+
11,
|
| 108 |
+
11,
|
| 109 |
+
10
|
| 110 |
+
],
|
| 111 |
+
"partial_rotary_factor": 0.25,
|
| 112 |
+
"rope_theta": 10000000,
|
| 113 |
+
"rope_type": "default"
|
| 114 |
+
},
|
| 115 |
+
"router_aux_loss_coef": 0.001,
|
| 116 |
+
"shared_expert_intermediate_size": 1024,
|
| 117 |
+
"tie_word_embeddings": false,
|
| 118 |
+
"use_cache": true,
|
| 119 |
+
"vocab_size": 248320
|
| 120 |
+
},
|
| 121 |
+
"tie_word_embeddings": false,
|
| 122 |
+
"transformers_version": "5.8.1",
|
| 123 |
+
"video_token_id": 248057,
|
| 124 |
+
"vision_config": {
|
| 125 |
+
"deepstack_visual_indexes": [],
|
| 126 |
+
"depth": 27,
|
| 127 |
+
"dtype": "bfloat16",
|
| 128 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 129 |
+
"hidden_size": 1152,
|
| 130 |
+
"in_channels": 3,
|
| 131 |
+
"initializer_range": 0.02,
|
| 132 |
+
"intermediate_size": 4304,
|
| 133 |
+
"model_type": "qwen3_5_moe",
|
| 134 |
+
"num_heads": 16,
|
| 135 |
+
"num_position_embeddings": 2304,
|
| 136 |
+
"out_hidden_size": 4096,
|
| 137 |
+
"patch_size": 16,
|
| 138 |
+
"spatial_merge_size": 2,
|
| 139 |
+
"temporal_patch_size": 2
|
| 140 |
+
},
|
| 141 |
+
"vision_end_token_id": 248054,
|
| 142 |
+
"vision_start_token_id": 248053,
|
| 143 |
+
"quantization_config": {
|
| 144 |
+
"quant_method": "exl3",
|
| 145 |
+
"version": "0.0.43",
|
| 146 |
+
"bits": 3.04,
|
| 147 |
+
"head_bits": 8,
|
| 148 |
+
"calibration": {
|
| 149 |
+
"rows": 250,
|
| 150 |
+
"cols": 2048
|
| 151 |
+
},
|
| 152 |
+
"out_scales": "always",
|
| 153 |
+
"codebook": "mcg"
|
| 154 |
+
}
|
| 155 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 248044,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": [
|
| 5 |
+
248046,
|
| 6 |
+
248044
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 248044,
|
| 9 |
+
"temperature": 0.6,
|
| 10 |
+
"top_k": 20,
|
| 11 |
+
"top_p": 0.95,
|
| 12 |
+
"transformers_version": "4.57.0.dev0"
|
| 13 |
+
}
|
model-00001-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d46a2f389733f589938bba29121ef13ffafc07ebebb3546b4b592654761bf52f
|
| 3 |
+
size 2034237561
|
model-00002-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c38b6a6315fb065996c80aac7927fec3275daf32524e38bd20e982d78b55624
|
| 3 |
+
size 2519209563
|
model-00003-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed711b57fd5cd8c96e2ee638a4776eccd9e0880ef7d2086cbb7cc1a8c264c992
|
| 3 |
+
size 2519209563
|
model-00005-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:acc17677ce45f535e047371c0816cbeb75231f25bb144903c768b59f7df6aef4
|
| 3 |
+
size 2510200842
|
model-00006-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aacfc9df78ef2059720442d3a36100b9e85bb9ee554b737c763ac50f8f6e47ad
|
| 3 |
+
size 2519209563
|
model-00009-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:01595bdadbaf087295859a116c24f7712a5eca5a56239d5002cd492b8c1e0b80
|
| 3 |
+
size 2510200842
|
model-00012-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb11d758414e1d13983b6fa8e7c1a840113505fb370a6677e04c209ed58d0d8c
|
| 3 |
+
size 2519215741
|
model-00015-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34df0f971b6a5f04dacecf4868a584ea02d3a5c07d6e080994dd2d0a1533bbe3
|
| 3 |
+
size 2519215741
|
model-00016-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6d7bdb21da252721d8fcb4ecedab5454ff2ffdbd36ba2a26f7ea6a20625476ed
|
| 3 |
+
size 2519215741
|
model-00019-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:607f040e2f2c4fc57f2c8452e0762cec8104e173dc1d5a8022eef5c7ef73e4a4
|
| 3 |
+
size 2519215741
|
model-00021-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7badaa42deb683a2ac15958b9a5e425a88dcaa21c7d9af851b369e61db1bf665
|
| 3 |
+
size 2510207020
|
model-00022-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:edeb94c00142d92ca155db2742a4ad7db4155fc091a9c69be6537518d896d255
|
| 3 |
+
size 2519215741
|
model-00024-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31f3e5f0f273fe45ff831aacad37faf2e9f2ef0eb59199169515574208cad003
|
| 3 |
+
size 2519215741
|
model-00025-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0f9ae8c65e1e5315f89bb028ea88442fcb3c36a8d1fd766ea3a4f7d6effe052
|
| 3 |
+
size 2510207020
|
model-00027-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92d519a60d165989d0e0e4ba5eee1ea9ac56e53f8c75ed2c4bf212ef7fe1d462
|
| 3 |
+
size 2519215741
|
model-00028-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46b74d79fea10e1cf029669a4c96995750957cea0ef4dc20dc97bfb98d25ca5e
|
| 3 |
+
size 2519215741
|
model-00029-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa9c176aa312ad715ca00e53ad0ab811e080350b8aa0afd90825105e93c29ed0
|
| 3 |
+
size 2510207020
|
model-00030-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21d0c3875ab5552229eef542bd58466e65c0effe0dc259deef83d0aa1609d9ca
|
| 3 |
+
size 2519215741
|
model-00031-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08b218eb78f1263f932e445ac5616624d8467b74f262e93bd1102d01e0d5b7c6
|
| 3 |
+
size 2519215741
|
model-00032-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a90c47006873ebb795172e59b6d4c19de36c8d2227b246c2dfb5642a696e9725
|
| 3 |
+
size 2519215741
|
model-00033-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fa72b11ad8a698d348d4ecb4143a4b31e7aa05b398a0ab9444b78b326c7a3be
|
| 3 |
+
size 2510207020
|
model-00045-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:091e392f8483edcd19165d4a9fdd5452d2c392b1071baa03a9fb6d7865917bbe
|
| 3 |
+
size 2510207020
|
model-00046-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8db14022f3b5e4d88f8de339f93742024253d47882435be87d2f91dc1b6bf27e
|
| 3 |
+
size 2519215741
|
model-00047-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1797d617791d8c207f17bef27dac15e920bd7aa490fb00985c64e794d6eed22d
|
| 3 |
+
size 2519215741
|
model-00048-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12822d3520477892e3cd54cfe5597ecb1d5145d4cffc0b3498e4c0d9058cea79
|
| 3 |
+
size 2519215741
|
model-00049-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba773fd0a839e3eee69b01f4cdfa7aedd1cb3d553788100dbc193265bfdda88e
|
| 3 |
+
size 2510207020
|
model-00051-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5cf816441c8604d4eccfa42f7261c1b438be6e47a5d4f3209186a9c8119719b
|
| 3 |
+
size 2519215741
|
model-00052-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccf30175298f83bbe9acef48e7bb67bafb10dd4e5920b12a9b5f5dcaa7271290
|
| 3 |
+
size 2519215741
|
model-00053-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14ac0a0791909be63404bf18124c29d6984c059cec176db145111addd1ed924b
|
| 3 |
+
size 2510207020
|
model-00054-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:df8a4f9913cb1ce3e50ab2abf7c4d0cc10ceab1a872e004751935688b753eb7e
|
| 3 |
+
size 2519215741
|
model-00055-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6648115cb571de076576c3d26e1175fd39e93d3ae903c8ae3b1153ea4c5adce5
|
| 3 |
+
size 2519215741
|
model-00057-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1eb20a5dc057556dd243e60d42503c3da03fbffc8164e1d8bb1b08158340d4d
|
| 3 |
+
size 2510207020
|
model-00058-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62de56b4f912cf865865d19b7e73238e6da9eae4a7ffd3cab95e33e95ac9652a
|
| 3 |
+
size 2519215741
|
model-00059-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a383b1f69de8cd0dd98e1fc8f5311dea102bc8d2d102d09b4ef2c6f5166afe1a
|
| 3 |
+
size 2519215741
|
model-00060-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d21ffdbb6ec292fa59e9b2233bc7b34bc8b89fc72ca98a854e3dfb947c4765e3
|
| 3 |
+
size 2519215741
|
model-00061-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5193c6e284faed1aa387d5d56cbbcec67df58238f6081dc9e23753d5ffe8fcb
|
| 3 |
+
size 4292512235
|
model-00062-of-00062.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45a2461d0fff65890c532d5cf638a98a1cb6aa8304f48bee11a05a094097045a
|
| 3 |
+
size 147385595
|
model.safetensors.index.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58fd7b1e8a16f8e3e9cdbd6e265240be2ee59097b1068f99e1fa49dd2a090e17
|
| 3 |
+
size 40039839
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"size": {
|
| 3 |
+
"longest_edge": 16777216,
|
| 4 |
+
"shortest_edge": 65536
|
| 5 |
+
},
|
| 6 |
+
"patch_size": 16,
|
| 7 |
+
"temporal_patch_size": 2,
|
| 8 |
+
"merge_size": 2,
|
| 9 |
+
"image_mean": [
|
| 10 |
+
0.5,
|
| 11 |
+
0.5,
|
| 12 |
+
0.5
|
| 13 |
+
],
|
| 14 |
+
"image_std": [
|
| 15 |
+
0.5,
|
| 16 |
+
0.5,
|
| 17 |
+
0.5
|
| 18 |
+
],
|
| 19 |
+
"processor_class": "Qwen3VLProcessor",
|
| 20 |
+
"image_processor_type": "Qwen2VLImageProcessorFast"
|
| 21 |
+
}
|
processor_config.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_processor": {
|
| 3 |
+
"do_convert_rgb": true,
|
| 4 |
+
"do_normalize": true,
|
| 5 |
+
"do_rescale": true,
|
| 6 |
+
"do_resize": true,
|
| 7 |
+
"image_mean": [
|
| 8 |
+
0.5,
|
| 9 |
+
0.5,
|
| 10 |
+
0.5
|
| 11 |
+
],
|
| 12 |
+
"image_processor_type": "Qwen2VLImageProcessor",
|
| 13 |
+
"image_std": [
|
| 14 |
+
0.5,
|
| 15 |
+
0.5,
|
| 16 |
+
0.5
|
| 17 |
+
],
|
| 18 |
+
"merge_size": 2,
|
| 19 |
+
"patch_size": 16,
|
| 20 |
+
"resample": 3,
|
| 21 |
+
"rescale_factor": 0.00392156862745098,
|
| 22 |
+
"size": {
|
| 23 |
+
"longest_edge": 16777216,
|
| 24 |
+
"shortest_edge": 65536
|
| 25 |
+
},
|
| 26 |
+
"temporal_patch_size": 2
|
| 27 |
+
},
|
| 28 |
+
"processor_class": "Qwen3VLProcessor",
|
| 29 |
+
"video_processor": {
|
| 30 |
+
"do_convert_rgb": true,
|
| 31 |
+
"do_normalize": true,
|
| 32 |
+
"do_rescale": true,
|
| 33 |
+
"do_resize": true,
|
| 34 |
+
"do_sample_frames": true,
|
| 35 |
+
"fps": 2,
|
| 36 |
+
"image_mean": [
|
| 37 |
+
0.5,
|
| 38 |
+
0.5,
|
| 39 |
+
0.5
|
| 40 |
+
],
|
| 41 |
+
"image_std": [
|
| 42 |
+
0.5,
|
| 43 |
+
0.5,
|
| 44 |
+
0.5
|
| 45 |
+
],
|
| 46 |
+
"max_frames": 768,
|
| 47 |
+
"merge_size": 2,
|
| 48 |
+
"min_frames": 4,
|
| 49 |
+
"patch_size": 16,
|
| 50 |
+
"resample": 3,
|
| 51 |
+
"rescale_factor": 0.00392156862745098,
|
| 52 |
+
"return_metadata": false,
|
| 53 |
+
"size": {
|
| 54 |
+
"longest_edge": 25165824,
|
| 55 |
+
"shortest_edge": 4096
|
| 56 |
+
},
|
| 57 |
+
"temporal_patch_size": 2,
|
| 58 |
+
"video_processor_type": "Qwen3VLVideoProcessor"
|
| 59 |
+
}
|
| 60 |
+
}
|
quantization_config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d13843abc48f35f578efb66b978ae0495be9231c0189d7f09ad944f44f435f67
|
| 3 |
+
size 120618873
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06b9509352d2af50381ab2247e083b80d32d5c0aba91c272ca9ff729b6a0e523
|
| 3 |
+
size 19989325
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"audio_bos_token": "<|audio_start|>",
|
| 4 |
+
"audio_eos_token": "<|audio_end|>",
|
| 5 |
+
"audio_token": "<|audio_pad|>",
|
| 6 |
+
"backend": "tokenizers",
|
| 7 |
+
"bos_token": null,
|
| 8 |
+
"clean_up_tokenization_spaces": false,
|
| 9 |
+
"eos_token": "<|im_end|>",
|
| 10 |
+
"errors": "replace",
|
| 11 |
+
"image_token": "<|image_pad|>",
|
| 12 |
+
"is_local": true,
|
| 13 |
+
"local_files_only": false,
|
| 14 |
+
"model_max_length": 262144,
|
| 15 |
+
"model_specific_special_tokens": {
|
| 16 |
+
"audio_bos_token": "<|audio_start|>",
|
| 17 |
+
"audio_eos_token": "<|audio_end|>",
|
| 18 |
+
"audio_token": "<|audio_pad|>",
|
| 19 |
+
"image_token": "<|image_pad|>",
|
| 20 |
+
"video_token": "<|video_pad|>",
|
| 21 |
+
"vision_bos_token": "<|vision_start|>",
|
| 22 |
+
"vision_eos_token": "<|vision_end|>"
|
| 23 |
+
},
|
| 24 |
+
"pad_token": "<|endoftext|>",
|
| 25 |
+
"pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
|
| 26 |
+
"processor_class": "Qwen3VLProcessor",
|
| 27 |
+
"split_special_tokens": false,
|
| 28 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 29 |
+
"unk_token": null,
|
| 30 |
+
"video_token": "<|video_pad|>",
|
| 31 |
+
"vision_bos_token": "<|vision_start|>",
|
| 32 |
+
"vision_eos_token": "<|vision_end|>"
|
| 33 |
+
}
|
video_preprocessor_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"size": {
|
| 3 |
+
"longest_edge": 25165824,
|
| 4 |
+
"shortest_edge": 4096
|
| 5 |
+
},
|
| 6 |
+
"patch_size": 16,
|
| 7 |
+
"temporal_patch_size": 2,
|
| 8 |
+
"merge_size": 2,
|
| 9 |
+
"image_mean": [
|
| 10 |
+
0.5,
|
| 11 |
+
0.5,
|
| 12 |
+
0.5
|
| 13 |
+
],
|
| 14 |
+
"image_std": [
|
| 15 |
+
0.5,
|
| 16 |
+
0.5,
|
| 17 |
+
0.5
|
| 18 |
+
],
|
| 19 |
+
"processor_class": "Qwen3VLProcessor",
|
| 20 |
+
"video_processor_type": "Qwen3VLVideoProcessor"
|
| 21 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|