cpral commited on
Commit
1ccf078
·
verified ·
1 Parent(s): 4f35a9c

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. README.md +465 -0
  3. chat_template.jinja +154 -0
  4. config.json +155 -0
  5. generation_config.json +13 -0
  6. model-00001-of-00062.safetensors +3 -0
  7. model-00002-of-00062.safetensors +3 -0
  8. model-00003-of-00062.safetensors +3 -0
  9. model-00005-of-00062.safetensors +3 -0
  10. model-00006-of-00062.safetensors +3 -0
  11. model-00009-of-00062.safetensors +3 -0
  12. model-00012-of-00062.safetensors +3 -0
  13. model-00015-of-00062.safetensors +3 -0
  14. model-00016-of-00062.safetensors +3 -0
  15. model-00019-of-00062.safetensors +3 -0
  16. model-00021-of-00062.safetensors +3 -0
  17. model-00022-of-00062.safetensors +3 -0
  18. model-00024-of-00062.safetensors +3 -0
  19. model-00025-of-00062.safetensors +3 -0
  20. model-00027-of-00062.safetensors +3 -0
  21. model-00028-of-00062.safetensors +3 -0
  22. model-00029-of-00062.safetensors +3 -0
  23. model-00030-of-00062.safetensors +3 -0
  24. model-00031-of-00062.safetensors +3 -0
  25. model-00032-of-00062.safetensors +3 -0
  26. model-00033-of-00062.safetensors +3 -0
  27. model-00045-of-00062.safetensors +3 -0
  28. model-00046-of-00062.safetensors +3 -0
  29. model-00047-of-00062.safetensors +3 -0
  30. model-00048-of-00062.safetensors +3 -0
  31. model-00049-of-00062.safetensors +3 -0
  32. model-00051-of-00062.safetensors +3 -0
  33. model-00052-of-00062.safetensors +3 -0
  34. model-00053-of-00062.safetensors +3 -0
  35. model-00054-of-00062.safetensors +3 -0
  36. model-00055-of-00062.safetensors +3 -0
  37. model-00057-of-00062.safetensors +3 -0
  38. model-00058-of-00062.safetensors +3 -0
  39. model-00059-of-00062.safetensors +3 -0
  40. model-00060-of-00062.safetensors +3 -0
  41. model-00061-of-00062.safetensors +3 -0
  42. model-00062-of-00062.safetensors +3 -0
  43. model.safetensors.index.json +3 -0
  44. preprocessor_config.json +21 -0
  45. processor_config.json +60 -0
  46. quantization_config.json +3 -0
  47. tokenizer.json +3 -0
  48. tokenizer_config.json +33 -0
  49. video_preprocessor_config.json +21 -0
  50. vocab.json +0 -0
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ assets/ornith_397b_eval.png filter=lfs diff=lfs merge=lfs -text
37
+ assets/ornith_logo.png filter=lfs diff=lfs merge=lfs -text
38
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
39
+ model.safetensors.index.json filter=lfs diff=lfs merge=lfs -text
40
+ quantization_config.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: mit
4
+ license_link: https://huggingface.co/deepreinforce-ai/Ornith-1.0-397B/blob/main/LICENSE
5
+ pipeline_tag: text-generation
6
+ ---
7
+
8
+
9
+ <img width="600px" src="assets/ornith_logo.png">
10
+
11
+ [![Ornith Blog](https://img.shields.io/badge/%F0%9F%A6%A2%EF%B8%8F%20Ornith%20Blog%20-FD8E5B)](https://deep-reinforce.com/ornith.html)
12
+
13
+ # Ornith-1.0-397B
14
+
15
+ Aloha! 🌺 Today, we are releasing Ornith-1.0, a self-improving family of open-source models for agentic coding.
16
+
17
+ Highlights:
18
+
19
+ - **State-of-the-Art Coding Agents**: Available in 9B-Dense, 31B-Dense, 35B-MoE, and 397B-MoE (post-trained on top of Gemma 4 and Qwen 3.5), achieving state-of-the-art performance among open-source models of comparable size on coding benchmarks such as Terminal-Bench 2.1, SWE-Bench, NL2Repo and OpenClaw.
20
+ - **Self-Improving Training Framework**: Ornith-1.0 employs RL to learn to generate not only solution rollouts, but also the scallfold that drive those rollouts. By jointly optimizing the scaffold and the resulting solution, the model discovers better search trajectories and generates higher-quality solutions.
21
+ - **Licence**: MIT licensed, globally accessible, and free from regional limitations.
22
+
23
+ <img style="width: 100%; max-width: 900px;" src="assets/ornith_397b_eval.png" alt="Ornith 35B Benchmark Results" title="Ornith 35B Benchmark Results">
24
+
25
+ ## Ornith 1.0 397B
26
+
27
+ This model card documents **Ornith-1.0-397B**, the lightweight member of the Ornith family, designed for efficient single-GPU deployment.
28
+
29
+
30
+ ### Benchmarks
31
+
32
+ <div style="font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;width:100%;margin:0 auto;padding:16px 0">
33
+ <table style="width:100%;table-layout:fixed;border-collapse:collapse;font-size:13px">
34
+ <thead><tr>
35
+ <th style="width:24%;padding:10px 7px;text-align:left;font-weight:600;border-bottom:2px solid #FD8E5B;color:#FD8E5B"></th>
36
+ <th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:700;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px;background:rgba(253, 142, 91, 0.12)">Ornith-1.0-397B</th>
37
+ <th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Qwen3.5-397B</th>
38
+ <th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Qwen3.7-Max</th>
39
+ <th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">GLM-5.2-744B</th>
40
+ <th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Minimax-M3-428B</th>
41
+ <th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">DeepSeek-V4-Pro-1.6T</th>
42
+ <th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Claude Opus 4.7</th>
43
+ <th style="width:9.50%;padding:10px 7px;text-align:center;font-weight:500;border-bottom:2px solid #FD8E5B;color:#FD8E5B;font-size:14px">Claude Opus 4.8</th>
44
+ </tr></thead>
45
+ <tbody>
46
+ <tr><td colspan="9" style="padding:8px 12px;font-weight:600;color:#FD8E5B;border-bottom:1px solid rgba(253, 142, 91, 0.2);background:rgba(253, 142, 91, 0.1)">Agentic Coding</td></tr>
47
+ <tr>
48
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">Terminal-Bench 2.1 <sub><small>(Terminus-2)</small></sub></td>
49
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">77.5</td>
50
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">53.5</td>
51
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">73.5</td>
52
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">81.0</td>
53
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">64</td>
54
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">64</td>
55
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">70.3</td>
56
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">85</td>
57
+ </tr>
58
+ <tr>
59
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">Terminal-Bench 2.1 <sub><small>(Claude Code)</small></sub></td>
60
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">78.2</td>
61
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">48.6</td>
62
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.8</td>
63
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">82.7</td>
64
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
65
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">66.5</td>
66
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.7</td>
67
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">78.9</td>
68
+ </tr>
69
+ <tr>
70
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE-bench Verified</td>
71
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">82.4</td>
72
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">76.4</td>
73
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">80.4</td>
74
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
75
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
76
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">80.6</td>
77
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">80.8</td>
78
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">87.6</td>
79
+ </tr>
80
+ <tr>
81
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE-bench Pro</td>
82
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">62.2</td>
83
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">51.6</td>
84
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">60.6</td>
85
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">62.1</td>
86
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">59</td>
87
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">55.4</td>
88
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">64.3</td>
89
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.2</td>
90
+ </tr>
91
+ <tr>
92
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE-bench Multilingual</td>
93
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">78.9</td>
94
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.3</td>
95
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">78.3</td>
96
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
97
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
98
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">76.2</td>
99
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
100
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
101
+ </tr>
102
+ <tr>
103
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">NL2Repo</td>
104
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">48.2</td>
105
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">36.8</td>
106
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">47.2</td>
107
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">48.9</td>
108
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">42.1</td>
109
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
110
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
111
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">69.7</td>
112
+ </tr>
113
+ <tr>
114
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">Claw-eval Avg</td>
115
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">77.1</td>
116
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">70.7</td>
117
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">65.2</td>
118
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
119
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
120
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">75.8</td>
121
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">78.2</td>
122
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
123
+ </tr>
124
+ <tr>
125
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE Atlas - QnA</td>
126
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">41.2</td>
127
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">20.4</td>
128
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
129
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
130
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">37.9</td>
131
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">27.2</td>
132
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">40.3</td>
133
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">48.8</td>
134
+ </tr>
135
+ <tr>
136
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE Atlas - RF</td>
137
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">42.6</td>
138
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">18.4</td>
139
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
140
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
141
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
142
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
143
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">48.6</td>
144
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">46.7</td>
145
+ </tr>
146
+ <tr>
147
+ <td style="padding:7px 7px;padding-left:20px;border-bottom:1px solid rgba(128, 128, 128, 0.15);">SWE Atlas - TW</td>
148
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15);font-weight:600;color:#FD8E5B;background:rgba(253, 142, 91, 0.06)">39.1</td>
149
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">18.5</td>
150
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
151
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
152
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">30.8</td>
153
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
154
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">38.5</td>
155
+ <td style="padding:7px 7px;text-align:center;border-bottom:1px solid rgba(128, 128, 128, 0.15)">-</td>
156
+ </tr>
157
+ </tbody>
158
+ </table>
159
+
160
+ <p style="margin-top:12px;font-size:10px;opacity:0.7">
161
+ * Terminal-Bench 2.1 (Terminus-2): We evaluate Terminal-Bench 2.1 using the Harbor/Terminus-2 framework with parser=json, temperature=1.0, top_p=1.0, and a 128K context window. Each run uses a 4-hour timeout with 32 CPU cores and 48GB RAM, and results are averaged over 5 runs. We adjust the Qwen chat template to ensure consistency between training and inference (https://huggingface.co/deepreinforce-ai/Ornith-1.0-397B/blob/main/chat_template.jinja), and modify Harbor to align with vLLM's reasoning_content key.<br/>
162
+ * Terminal-Bench 2.1 (Claude Code): We evaluate Terminal-Bench 2.1 using Claude Code 2.1.126 with parser=json, temperature=1.0, top_p=1.0, max_new_tokens=131072. Results are averaged over 5 runs. Again, Qwen chat template needs to be modified.<br/>
163
+ * SWE-Bench Verified, Pro and Multilingual: using OpenHands harness with temp=1.0, top_p=0.95, 256k context window.<br/>
164
+ * SWE Atlas QnA, RF, TW: using mini SWE agent harness with temp=1.0, top_p=0.95, 128K context window. Results are averaged over 5 runs.<br/>
165
+ * NL2Repo: with temperature=1.0, top_p=1.0, 400K context, 48K output and anti-hacking filters.<br/>
166
+ * ClawEval: An agentic code benchmark over real-user task distributions; temp=0.6 and 256K context.<br/>
167
+ </p>
168
+
169
+ </div>
170
+
171
+
172
+
173
+
174
+ ## Quickstart
175
+
176
+
177
+ <div style="border-left:4px solid #FD8E5B;background:rgba(253,142,91,0.1);border-radius:6px;padding:12px 16px;font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;font-size:14px;line-height:1.6">
178
+ <div style="font-weight:700;color:#FD8E5B;margin-bottom:6px">📝 NOTE</div>
179
+ <p style="margin:0 0 10px"><b>Ornith-1.0-397B</b> is a <b>reasoning model</b>: by default the assistant turn opens with a <code style="background:rgba(253,142,91,0.15);padding:1px 5px;border-radius:4px">&lt;think&gt; … &lt;/think&gt;</code> block before the final answer. The serving recipes below enable a reasoning parser so the chain-of-thought is returned in a separate <code style="background:rgba(253,142,91,0.15);padding:1px 5px;border-radius:4px">reasoning_content</code> field, and a tool-call parser so the model's <code style="background:rgba(253,142,91,0.15);padding:1px 5px;border-radius:4px">&lt;tool_call&gt;</code> blocks are surfaced as OpenAI-style <code style="background:rgba(253,142,91,0.15);padding:1px 5px;border-radius:4px">tool_calls</code>.</p>
180
+ <p style="margin:0 0 6px">Serving Ornith-1.0-397B requires recent runtimes:</p>
181
+ <ul style="margin:0;padding-left:20px">
182
+ <li><b>Transformers</b> ≥ 5.8.1</li>
183
+ <li><b>vLLM</b> ≥ 0.19.1</li>
184
+ <li><b>SGLang</b> ≥ 0.5.9</li>
185
+ </ul>
186
+ </div>
187
+
188
+ ### Serving Ornith-1.0-397B
189
+
190
+ The two recipes below stand up an OpenAI-compatible server on a single 8×80GB GPU node (tensor-parallel 8). Adjust `--tensor-parallel-size` / `--tp` to the number of GPUs you have.
191
+
192
+ #### vLLM
193
+
194
+ ```bash
195
+ vllm serve deepreinforce-ai/Ornith-1.0-397B \
196
+ --served-model-name Ornith-1.0-397B \
197
+ --tensor-parallel-size 8 \
198
+ --host 0.0.0.0 --port 8000 \
199
+ --max-model-len 262144 \
200
+ --gpu-memory-utilization 0.90 \
201
+ --enable-prefix-caching \
202
+ --enable-auto-tool-choice --tool-call-parser qwen3_xml \
203
+ --reasoning-parser qwen3 \
204
+ --trust-remote-code
205
+ ```
206
+
207
+ #### SGLang
208
+
209
+ ```bash
210
+ python -m sglang.launch_server \
211
+ --model-path deepreinforce-ai/Ornith-1.0-397B \
212
+ --served-model-name Ornith-1.0-397B \
213
+ --tp 8 \
214
+ --host 0.0.0.0 --port 8000 \
215
+ --context-length 262144 \
216
+ --mem-fraction-static 0.85 \
217
+ --tool-call-parser qwen3_coder \
218
+ --reasoning-parser qwen3
219
+ ```
220
+
221
+ #### Hugging Face Transformers
222
+
223
+ For a quick local test (or to script offline generation), load the model directly with Transformers. Make sure you have a recent release installed — see the [Transformers installation guide](https://huggingface.co/docs/transformers/installation); Ornith-1.0-397B requires `transformers >= 5.8.1`.
224
+
225
+ ```python
226
+ from transformers import AutoModelForCausalLM, AutoTokenizer
227
+
228
+ model_name = "deepreinforce-ai/Ornith-1.0-397B"
229
+
230
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
231
+ model = AutoModelForCausalLM.from_pretrained(
232
+ model_name,
233
+ dtype="auto",
234
+ device_map="auto",
235
+ )
236
+
237
+ messages = [
238
+ {"role": "user", "content": "Write a Python function is_prime(n). Keep it short."}
239
+ ]
240
+ text = tokenizer.apply_chat_template(
241
+ messages,
242
+ tokenize=False,
243
+ add_generation_prompt=True,
244
+ )
245
+
246
+ inputs = tokenizer(text, return_tensors="pt").to(model.device)
247
+ generated = model.generate(
248
+ **inputs,
249
+ max_new_tokens=512,
250
+ do_sample=True,
251
+ temperature=0.6,
252
+ top_p=0.95,
253
+ top_k=20,
254
+ )
255
+ output_ids = generated[0][inputs.input_ids.shape[1]:]
256
+
257
+ # The reply contains a <think> ... </think> reasoning block followed by the answer.
258
+ content = tokenizer.decode(output_ids, skip_special_tokens=True)
259
+ print(content)
260
+ ```
261
+
262
+ To split the reasoning trace from the final answer, parse on the `</think>` marker:
263
+
264
+ ```python
265
+ text = tokenizer.decode(output_ids, skip_special_tokens=True)
266
+ if "</think>" in text:
267
+ reasoning, answer = text.split("</think>", 1)
268
+ reasoning = reasoning.replace("<think>", "").strip()
269
+ answer = answer.strip()
270
+ else:
271
+ reasoning, answer = "", text.strip()
272
+ ```
273
+
274
+ ### Using Ornith-1.0-397B via the Chat Completions API
275
+
276
+ Once a vLLM or SGLang server is running, talk to it with any OpenAI-compatible client.
277
+
278
+ #### Basic Usage
279
+
280
+ ```python
281
+ from openai import OpenAI
282
+
283
+ client = OpenAI(
284
+ base_url="http://localhost:8000/v1",
285
+ api_key="EMPTY", # any non-empty string works for a local server
286
+ )
287
+
288
+ response = client.chat.completions.create(
289
+ model="Ornith-1.0-397B",
290
+ messages=[
291
+ {"role": "user", "content": "Write a one-line Python lambda that squares a number."}
292
+ ],
293
+ temperature=0.6,
294
+ top_p=0.95,
295
+ max_tokens=1024,
296
+ )
297
+
298
+ message = response.choices[0].message
299
+ # reasoning_content holds the <think> trace; content holds the final answer.
300
+ print("reasoning:", getattr(message, "reasoning_content", None))
301
+ print("answer:", message.content)
302
+ ```
303
+
304
+ You can also stream tokens, or hand the model tools — Ornith-1.0-397B emits well-formed function calls that the server parses into the standard `tool_calls` field:
305
+
306
+ ```python
307
+ tools = [
308
+ {
309
+ "type": "function",
310
+ "function": {
311
+ "name": "get_weather",
312
+ "description": "Get the current weather for a city",
313
+ "parameters": {
314
+ "type": "object",
315
+ "properties": {"city": {"type": "string"}},
316
+ "required": ["city"],
317
+ },
318
+ },
319
+ }
320
+ ]
321
+
322
+ response = client.chat.completions.create(
323
+ model="Ornith-1.0-397B",
324
+ messages=[{"role": "user", "content": "What is the weather in Paris right now?"}],
325
+ tools=tools,
326
+ tool_choice="auto",
327
+ temperature=0.6,
328
+ max_tokens=2048,
329
+ )
330
+
331
+ tool_call = response.choices[0].message.tool_calls[0]
332
+ print(tool_call.function.name, tool_call.function.arguments)
333
+ # -> get_weather {"city": "Paris"}
334
+ ```
335
+
336
+ You can point any OpenAI-compatible SDK (Python, Node.js, etc.) or `curl` at the same `/v1/chat/completions` endpoint.
337
+
338
+ ## Agentic Usage
339
+
340
+ Ornith-1.0-397B excels in tool-calling and agentic coding capabilities.
341
+
342
+ ### Agent Frameworks
343
+
344
+ Because Ornith-1.0-397B exposes an OpenAI-compatible endpoint with tool calling, it works out of the box with standard agent frameworks. Below is a minimal example that connects Ornith-1.0-397B to tools through an MCP server.
345
+
346
+ ```python
347
+ import os
348
+ from openai import OpenAI
349
+
350
+ client = OpenAI(
351
+ base_url=os.getenv("OPENAI_BASE_URL", "http://localhost:8000/v1"),
352
+ api_key=os.getenv("OPENAI_API_KEY", "EMPTY"),
353
+ )
354
+
355
+ tools = [
356
+ {
357
+ "type": "function",
358
+ "function": {
359
+ "name": "run_shell",
360
+ "description": "Run a shell command and return its output.",
361
+ "parameters": {
362
+ "type": "object",
363
+ "properties": {
364
+ "command": {"type": "string", "description": "The command to run"}
365
+ },
366
+ "required": ["command"],
367
+ },
368
+ },
369
+ }
370
+ ]
371
+
372
+ messages = [{"role": "user", "content": "List the Python files in the current directory."}]
373
+
374
+ response = client.chat.completions.create(
375
+ model="deepreinforce-ai/Ornith-1.0-397B",
376
+ messages=messages,
377
+ tools=tools,
378
+ temperature=0.6,
379
+ top_p=0.95,
380
+ )
381
+ print(response.choices[0].message)
382
+ ```
383
+
384
+ **Examples of using Ornith with agent harness:**
385
+
386
+ #### Hermes Agent
387
+ ```bash
388
+ # Hermes talks to any OpenAI-compatible endpoint — point it at your Ornith server.
389
+ export OPENAI_BASE_URL="http://localhost:8000/v1"
390
+ export OPENAI_API_KEY="EMPTY"
391
+ export MODEL="deepreinforce-ai/Ornith-1.0-397B"
392
+ ```
393
+
394
+ #### OpenClaw
395
+
396
+ ```bash
397
+ # OpenClaw talks to any OpenAI-compatible endpoint — point it at your Ornith server.
398
+ export OPENAI_BASE_URL="http://localhost:8000/v1"
399
+ export OPENAI_API_KEY="EMPTY"
400
+ export OPENAI_MODEL="deepreinforce-ai/Ornith-1.0-397B"
401
+ ```
402
+
403
+ #### Unsloth Studio
404
+
405
+ ```bash
406
+ pip install unsloth
407
+
408
+ # Load Ornith for fast local inference or fine-tuning (Python):
409
+ # from unsloth import FastLanguageModel
410
+ # model, tokenizer = FastLanguageModel.from_pretrained(
411
+ # "deepreinforce-ai/Ornith-1.0-397B",
412
+ # max_seq_length=262144,
413
+ # load_in_4bit=True,
414
+ # )
415
+ ```
416
+
417
+ #### OpenHands
418
+ ```bash
419
+ pip install openhands-ai
420
+
421
+ # OpenHands routes through LiteLLM; the "openai/" prefix selects the OpenAI-compatible path.
422
+ export LLM_MODEL="openai/deepreinforce-ai/Ornith-1.0-397B"
423
+ export LLM_BASE_URL="http://localhost:8000/v1"
424
+ export LLM_API_KEY="EMPTY"
425
+
426
+ # Launch the CLI (or run the official OpenHands Docker image with the same env vars).
427
+ openhands
428
+ ```
429
+
430
+ ### Coding CLIs
431
+
432
+ Ornith-1.0-397B is optimized for terminal-based coding agents. Point any OpenAI-compatible coding CLI at your Ornith-1.0-397B endpoint (set `OPENAI_BASE_URL` and `OPENAI_API_KEY`) to understand large codebases, automate tedious work, and ship faster.
433
+
434
+ #### OpenCode
435
+ ```bash
436
+ # Register your local Ornith endpoint as a provider in ~/.config/opencode/opencode.json:
437
+ #
438
+ # {
439
+ # "$schema": "https://opencode.ai/config.json",
440
+ # "provider": {
441
+ # "ornith": {
442
+ # "npm": "@ai-sdk/openai-compatible",
443
+ # "name": "Ornith (local)",
444
+ # "options": { "baseURL": "http://localhost:8000/v1", "apiKey": "EMPTY" },
445
+ # "models": { "deepreinforce-ai/Ornith-1.0-397B": { "name": "Ornith-1.0-397B" } }
446
+ # }
447
+ # }
448
+ # }
449
+
450
+ opencode
451
+ ```
452
+
453
+
454
+ ### Citation
455
+
456
+ If you find our work helpful, feel free to give us a cite.
457
+
458
+ ```bibtex
459
+ @misc{ornith_397b,
460
+ title = {{Ornith-1.0-397B}: Agentic Coding, Open to All},
461
+ url = {https://deep-reinforce.com/ornith_1_0.html},
462
+ author = {{DeepReinforce Team}},
463
+ year = {2026}
464
+ }
465
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count, is_system_content=false) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- elif content is iterable and content is not mapping %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if is_system_content %}
10
+ {{- raise_exception('System message cannot contain images.') }}
11
+ {%- endif %}
12
+ {%- if do_vision_count %}
13
+ {%- set image_count.value = image_count.value + 1 %}
14
+ {%- endif %}
15
+ {%- if add_vision_id %}
16
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
17
+ {%- endif %}
18
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
19
+ {%- elif 'video' in item or item.type == 'video' %}
20
+ {%- if is_system_content %}
21
+ {{- raise_exception('System message cannot contain videos.') }}
22
+ {%- endif %}
23
+ {%- if do_vision_count %}
24
+ {%- set video_count.value = video_count.value + 1 %}
25
+ {%- endif %}
26
+ {%- if add_vision_id %}
27
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
28
+ {%- endif %}
29
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
30
+ {%- elif 'text' in item %}
31
+ {{- item.text }}
32
+ {%- else %}
33
+ {{- raise_exception('Unexpected item type in content.') }}
34
+ {%- endif %}
35
+ {%- endfor %}
36
+ {%- elif content is none or content is undefined %}
37
+ {{- '' }}
38
+ {%- else %}
39
+ {{- raise_exception('Unexpected content type.') }}
40
+ {%- endif %}
41
+ {%- endmacro %}
42
+ {%- if not messages %}
43
+ {{- raise_exception('No messages provided.') }}
44
+ {%- endif %}
45
+ {%- if tools and tools is iterable and tools is not mapping %}
46
+ {{- '<|im_start|>system\n' }}
47
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
48
+ {%- for tool in tools %}
49
+ {{- "\n" }}
50
+ {{- tool | tojson }}
51
+ {%- endfor %}
52
+ {{- "\n</tools>" }}
53
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
54
+ {%- if messages[0].role == 'system' %}
55
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
56
+ {%- if content %}
57
+ {{- '\n\n' + content }}
58
+ {%- endif %}
59
+ {%- endif %}
60
+ {{- '<|im_end|>\n' }}
61
+ {%- else %}
62
+ {%- if messages[0].role == 'system' %}
63
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
64
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
68
+ {%- for message in messages[::-1] %}
69
+ {%- set index = (messages|length - 1) - loop.index0 %}
70
+ {%- if ns.multi_step_tool and message.role == "user" %}
71
+ {%- set content = render_content(message.content, false)|trim %}
72
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
73
+ {%- set ns.multi_step_tool = false %}
74
+ {%- set ns.last_query_index = index %}
75
+ {%- endif %}
76
+ {%- endif %}
77
+ {%- endfor %}
78
+ {%- if ns.multi_step_tool %}
79
+ {{- raise_exception('No user query found in messages.') }}
80
+ {%- endif %}
81
+ {%- for message in messages %}
82
+ {%- set content = render_content(message.content, true)|trim %}
83
+ {%- if message.role == "system" %}
84
+ {%- if not loop.first %}
85
+ {{- raise_exception('System message must be at the beginning.') }}
86
+ {%- endif %}
87
+ {%- elif message.role == "user" %}
88
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
89
+ {%- elif message.role == "assistant" %}
90
+ {%- set reasoning_content = '' %}
91
+ {%- if message.reasoning_content is string %}
92
+ {%- set reasoning_content = message.reasoning_content %}
93
+ {%- else %}
94
+ {%- if '</think>' in content %}
95
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
96
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
97
+ {%- endif %}
98
+ {%- endif %}
99
+ {%- set reasoning_content = reasoning_content|trim %}
100
+ {%- if loop.index0 > ns.last_query_index %}
101
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
102
+ {%- else %}
103
+ {{- '<|im_start|>' + message.role + '\n' + content }}
104
+ {%- endif %}
105
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
106
+ {%- for tool_call in message.tool_calls %}
107
+ {%- if tool_call.function is defined %}
108
+ {%- set tool_call = tool_call.function %}
109
+ {%- endif %}
110
+ {%- if loop.first %}
111
+ {%- if content|trim %}
112
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
113
+ {%- else %}
114
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
115
+ {%- endif %}
116
+ {%- else %}
117
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
118
+ {%- endif %}
119
+ {%- if tool_call.arguments is defined %}
120
+ {%- for args_name, args_value in tool_call.arguments|items %}
121
+ {{- '<parameter=' + args_name + '>\n' }}
122
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
123
+ {{- args_value }}
124
+ {{- '\n</parameter>\n' }}
125
+ {%- endfor %}
126
+ {%- endif %}
127
+ {{- '</function>\n</tool_call>' }}
128
+ {%- endfor %}
129
+ {%- endif %}
130
+ {{- '<|im_end|>\n' }}
131
+ {%- elif message.role == "tool" %}
132
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
133
+ {{- '<|im_start|>user' }}
134
+ {%- endif %}
135
+ {{- '\n<tool_response>\n' }}
136
+ {{- content }}
137
+ {{- '\n</tool_response>' }}
138
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
139
+ {{- '<|im_end|>\n' }}
140
+ {%- elif loop.last %}
141
+ {{- '<|im_end|>\n' }}
142
+ {%- endif %}
143
+ {%- else %}
144
+ {{- raise_exception('Unexpected message role.') }}
145
+ {%- endif %}
146
+ {%- endfor %}
147
+ {%- if add_generation_prompt %}
148
+ {{- '<|im_start|>assistant\n' }}
149
+ {%- if enable_thinking is defined and enable_thinking is false %}
150
+ {{- '<think>\n\n</think>\n\n' }}
151
+ {%- else %}
152
+ {{- '<think>\n' }}
153
+ {%- endif %}
154
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3_5MoeForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "hidden_size": 4096,
7
+ "image_token_id": 248056,
8
+ "model_type": "qwen3_5_moe",
9
+ "text_config": {
10
+ "attention_bias": false,
11
+ "attention_dropout": 0.0,
12
+ "attn_output_gate": true,
13
+ "bos_token_id": null,
14
+ "dtype": "bfloat16",
15
+ "eos_token_id": 248044,
16
+ "full_attention_interval": 4,
17
+ "head_dim": 256,
18
+ "hidden_act": "silu",
19
+ "hidden_size": 4096,
20
+ "initializer_range": 0.02,
21
+ "layer_types": [
22
+ "linear_attention",
23
+ "linear_attention",
24
+ "linear_attention",
25
+ "full_attention",
26
+ "linear_attention",
27
+ "linear_attention",
28
+ "linear_attention",
29
+ "full_attention",
30
+ "linear_attention",
31
+ "linear_attention",
32
+ "linear_attention",
33
+ "full_attention",
34
+ "linear_attention",
35
+ "linear_attention",
36
+ "linear_attention",
37
+ "full_attention",
38
+ "linear_attention",
39
+ "linear_attention",
40
+ "linear_attention",
41
+ "full_attention",
42
+ "linear_attention",
43
+ "linear_attention",
44
+ "linear_attention",
45
+ "full_attention",
46
+ "linear_attention",
47
+ "linear_attention",
48
+ "linear_attention",
49
+ "full_attention",
50
+ "linear_attention",
51
+ "linear_attention",
52
+ "linear_attention",
53
+ "full_attention",
54
+ "linear_attention",
55
+ "linear_attention",
56
+ "linear_attention",
57
+ "full_attention",
58
+ "linear_attention",
59
+ "linear_attention",
60
+ "linear_attention",
61
+ "full_attention",
62
+ "linear_attention",
63
+ "linear_attention",
64
+ "linear_attention",
65
+ "full_attention",
66
+ "linear_attention",
67
+ "linear_attention",
68
+ "linear_attention",
69
+ "full_attention",
70
+ "linear_attention",
71
+ "linear_attention",
72
+ "linear_attention",
73
+ "full_attention",
74
+ "linear_attention",
75
+ "linear_attention",
76
+ "linear_attention",
77
+ "full_attention",
78
+ "linear_attention",
79
+ "linear_attention",
80
+ "linear_attention",
81
+ "full_attention"
82
+ ],
83
+ "linear_conv_kernel_dim": 4,
84
+ "linear_key_head_dim": 128,
85
+ "linear_num_key_heads": 16,
86
+ "linear_num_value_heads": 64,
87
+ "linear_value_head_dim": 128,
88
+ "mamba_ssm_dtype": "float32",
89
+ "max_position_embeddings": 262144,
90
+ "mlp_only_layers": [],
91
+ "model_type": "qwen3_5_moe_text",
92
+ "moe_intermediate_size": 1024,
93
+ "mtp_num_hidden_layers": 1,
94
+ "mtp_use_dedicated_embeddings": false,
95
+ "num_attention_heads": 32,
96
+ "num_experts": 512,
97
+ "num_experts_per_tok": 10,
98
+ "num_hidden_layers": 60,
99
+ "num_key_value_heads": 2,
100
+ "output_router_logits": false,
101
+ "pad_token_id": null,
102
+ "partial_rotary_factor": 0.25,
103
+ "rms_norm_eps": 1e-06,
104
+ "rope_parameters": {
105
+ "mrope_interleaved": true,
106
+ "mrope_section": [
107
+ 11,
108
+ 11,
109
+ 10
110
+ ],
111
+ "partial_rotary_factor": 0.25,
112
+ "rope_theta": 10000000,
113
+ "rope_type": "default"
114
+ },
115
+ "router_aux_loss_coef": 0.001,
116
+ "shared_expert_intermediate_size": 1024,
117
+ "tie_word_embeddings": false,
118
+ "use_cache": true,
119
+ "vocab_size": 248320
120
+ },
121
+ "tie_word_embeddings": false,
122
+ "transformers_version": "5.8.1",
123
+ "video_token_id": 248057,
124
+ "vision_config": {
125
+ "deepstack_visual_indexes": [],
126
+ "depth": 27,
127
+ "dtype": "bfloat16",
128
+ "hidden_act": "gelu_pytorch_tanh",
129
+ "hidden_size": 1152,
130
+ "in_channels": 3,
131
+ "initializer_range": 0.02,
132
+ "intermediate_size": 4304,
133
+ "model_type": "qwen3_5_moe",
134
+ "num_heads": 16,
135
+ "num_position_embeddings": 2304,
136
+ "out_hidden_size": 4096,
137
+ "patch_size": 16,
138
+ "spatial_merge_size": 2,
139
+ "temporal_patch_size": 2
140
+ },
141
+ "vision_end_token_id": 248054,
142
+ "vision_start_token_id": 248053,
143
+ "quantization_config": {
144
+ "quant_method": "exl3",
145
+ "version": "0.0.43",
146
+ "bits": 3.04,
147
+ "head_bits": 8,
148
+ "calibration": {
149
+ "rows": 250,
150
+ "cols": 2048
151
+ },
152
+ "out_scales": "always",
153
+ "codebook": "mcg"
154
+ }
155
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 248044,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 248046,
6
+ 248044
7
+ ],
8
+ "pad_token_id": 248044,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.57.0.dev0"
13
+ }
model-00001-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d46a2f389733f589938bba29121ef13ffafc07ebebb3546b4b592654761bf52f
3
+ size 2034237561
model-00002-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c38b6a6315fb065996c80aac7927fec3275daf32524e38bd20e982d78b55624
3
+ size 2519209563
model-00003-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed711b57fd5cd8c96e2ee638a4776eccd9e0880ef7d2086cbb7cc1a8c264c992
3
+ size 2519209563
model-00005-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acc17677ce45f535e047371c0816cbeb75231f25bb144903c768b59f7df6aef4
3
+ size 2510200842
model-00006-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aacfc9df78ef2059720442d3a36100b9e85bb9ee554b737c763ac50f8f6e47ad
3
+ size 2519209563
model-00009-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01595bdadbaf087295859a116c24f7712a5eca5a56239d5002cd492b8c1e0b80
3
+ size 2510200842
model-00012-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb11d758414e1d13983b6fa8e7c1a840113505fb370a6677e04c209ed58d0d8c
3
+ size 2519215741
model-00015-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34df0f971b6a5f04dacecf4868a584ea02d3a5c07d6e080994dd2d0a1533bbe3
3
+ size 2519215741
model-00016-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d7bdb21da252721d8fcb4ecedab5454ff2ffdbd36ba2a26f7ea6a20625476ed
3
+ size 2519215741
model-00019-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607f040e2f2c4fc57f2c8452e0762cec8104e173dc1d5a8022eef5c7ef73e4a4
3
+ size 2519215741
model-00021-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7badaa42deb683a2ac15958b9a5e425a88dcaa21c7d9af851b369e61db1bf665
3
+ size 2510207020
model-00022-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edeb94c00142d92ca155db2742a4ad7db4155fc091a9c69be6537518d896d255
3
+ size 2519215741
model-00024-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31f3e5f0f273fe45ff831aacad37faf2e9f2ef0eb59199169515574208cad003
3
+ size 2519215741
model-00025-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f9ae8c65e1e5315f89bb028ea88442fcb3c36a8d1fd766ea3a4f7d6effe052
3
+ size 2510207020
model-00027-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92d519a60d165989d0e0e4ba5eee1ea9ac56e53f8c75ed2c4bf212ef7fe1d462
3
+ size 2519215741
model-00028-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b74d79fea10e1cf029669a4c96995750957cea0ef4dc20dc97bfb98d25ca5e
3
+ size 2519215741
model-00029-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa9c176aa312ad715ca00e53ad0ab811e080350b8aa0afd90825105e93c29ed0
3
+ size 2510207020
model-00030-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21d0c3875ab5552229eef542bd58466e65c0effe0dc259deef83d0aa1609d9ca
3
+ size 2519215741
model-00031-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08b218eb78f1263f932e445ac5616624d8467b74f262e93bd1102d01e0d5b7c6
3
+ size 2519215741
model-00032-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a90c47006873ebb795172e59b6d4c19de36c8d2227b246c2dfb5642a696e9725
3
+ size 2519215741
model-00033-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fa72b11ad8a698d348d4ecb4143a4b31e7aa05b398a0ab9444b78b326c7a3be
3
+ size 2510207020
model-00045-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091e392f8483edcd19165d4a9fdd5452d2c392b1071baa03a9fb6d7865917bbe
3
+ size 2510207020
model-00046-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db14022f3b5e4d88f8de339f93742024253d47882435be87d2f91dc1b6bf27e
3
+ size 2519215741
model-00047-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1797d617791d8c207f17bef27dac15e920bd7aa490fb00985c64e794d6eed22d
3
+ size 2519215741
model-00048-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12822d3520477892e3cd54cfe5597ecb1d5145d4cffc0b3498e4c0d9058cea79
3
+ size 2519215741
model-00049-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba773fd0a839e3eee69b01f4cdfa7aedd1cb3d553788100dbc193265bfdda88e
3
+ size 2510207020
model-00051-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5cf816441c8604d4eccfa42f7261c1b438be6e47a5d4f3209186a9c8119719b
3
+ size 2519215741
model-00052-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccf30175298f83bbe9acef48e7bb67bafb10dd4e5920b12a9b5f5dcaa7271290
3
+ size 2519215741
model-00053-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14ac0a0791909be63404bf18124c29d6984c059cec176db145111addd1ed924b
3
+ size 2510207020
model-00054-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df8a4f9913cb1ce3e50ab2abf7c4d0cc10ceab1a872e004751935688b753eb7e
3
+ size 2519215741
model-00055-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6648115cb571de076576c3d26e1175fd39e93d3ae903c8ae3b1153ea4c5adce5
3
+ size 2519215741
model-00057-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1eb20a5dc057556dd243e60d42503c3da03fbffc8164e1d8bb1b08158340d4d
3
+ size 2510207020
model-00058-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62de56b4f912cf865865d19b7e73238e6da9eae4a7ffd3cab95e33e95ac9652a
3
+ size 2519215741
model-00059-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a383b1f69de8cd0dd98e1fc8f5311dea102bc8d2d102d09b4ef2c6f5166afe1a
3
+ size 2519215741
model-00060-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d21ffdbb6ec292fa59e9b2233bc7b34bc8b89fc72ca98a854e3dfb947c4765e3
3
+ size 2519215741
model-00061-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5193c6e284faed1aa387d5d56cbbcec67df58238f6081dc9e23753d5ffe8fcb
3
+ size 4292512235
model-00062-of-00062.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45a2461d0fff65890c532d5cf638a98a1cb6aa8304f48bee11a05a094097045a
3
+ size 147385595
model.safetensors.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58fd7b1e8a16f8e3e9cdbd6e265240be2ee59097b1068f99e1fa49dd2a090e17
3
+ size 40039839
preprocessor_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "size": {
3
+ "longest_edge": 16777216,
4
+ "shortest_edge": 65536
5
+ },
6
+ "patch_size": 16,
7
+ "temporal_patch_size": 2,
8
+ "merge_size": 2,
9
+ "image_mean": [
10
+ 0.5,
11
+ 0.5,
12
+ 0.5
13
+ ],
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "Qwen3VLProcessor",
20
+ "image_processor_type": "Qwen2VLImageProcessorFast"
21
+ }
processor_config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "do_convert_rgb": true,
4
+ "do_normalize": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "Qwen2VLImageProcessor",
13
+ "image_std": [
14
+ 0.5,
15
+ 0.5,
16
+ 0.5
17
+ ],
18
+ "merge_size": 2,
19
+ "patch_size": 16,
20
+ "resample": 3,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "longest_edge": 16777216,
24
+ "shortest_edge": 65536
25
+ },
26
+ "temporal_patch_size": 2
27
+ },
28
+ "processor_class": "Qwen3VLProcessor",
29
+ "video_processor": {
30
+ "do_convert_rgb": true,
31
+ "do_normalize": true,
32
+ "do_rescale": true,
33
+ "do_resize": true,
34
+ "do_sample_frames": true,
35
+ "fps": 2,
36
+ "image_mean": [
37
+ 0.5,
38
+ 0.5,
39
+ 0.5
40
+ ],
41
+ "image_std": [
42
+ 0.5,
43
+ 0.5,
44
+ 0.5
45
+ ],
46
+ "max_frames": 768,
47
+ "merge_size": 2,
48
+ "min_frames": 4,
49
+ "patch_size": 16,
50
+ "resample": 3,
51
+ "rescale_factor": 0.00392156862745098,
52
+ "return_metadata": false,
53
+ "size": {
54
+ "longest_edge": 25165824,
55
+ "shortest_edge": 4096
56
+ },
57
+ "temporal_patch_size": 2,
58
+ "video_processor_type": "Qwen3VLVideoProcessor"
59
+ }
60
+ }
quantization_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d13843abc48f35f578efb66b978ae0495be9231c0189d7f09ad944f44f435f67
3
+ size 120618873
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b9509352d2af50381ab2247e083b80d32d5c0aba91c272ca9ff729b6a0e523
3
+ size 19989325
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "audio_bos_token": "<|audio_start|>",
4
+ "audio_eos_token": "<|audio_end|>",
5
+ "audio_token": "<|audio_pad|>",
6
+ "backend": "tokenizers",
7
+ "bos_token": null,
8
+ "clean_up_tokenization_spaces": false,
9
+ "eos_token": "<|im_end|>",
10
+ "errors": "replace",
11
+ "image_token": "<|image_pad|>",
12
+ "is_local": true,
13
+ "local_files_only": false,
14
+ "model_max_length": 262144,
15
+ "model_specific_special_tokens": {
16
+ "audio_bos_token": "<|audio_start|>",
17
+ "audio_eos_token": "<|audio_end|>",
18
+ "audio_token": "<|audio_pad|>",
19
+ "image_token": "<|image_pad|>",
20
+ "video_token": "<|video_pad|>",
21
+ "vision_bos_token": "<|vision_start|>",
22
+ "vision_eos_token": "<|vision_end|>"
23
+ },
24
+ "pad_token": "<|endoftext|>",
25
+ "pretokenize_regex": "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?[\\p{L}\\p{M}]+|\\p{N}| ?[^\\s\\p{L}\\p{M}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+",
26
+ "processor_class": "Qwen3VLProcessor",
27
+ "split_special_tokens": false,
28
+ "tokenizer_class": "Qwen2Tokenizer",
29
+ "unk_token": null,
30
+ "video_token": "<|video_pad|>",
31
+ "vision_bos_token": "<|vision_start|>",
32
+ "vision_eos_token": "<|vision_end|>"
33
+ }
video_preprocessor_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "size": {
3
+ "longest_edge": 25165824,
4
+ "shortest_edge": 4096
5
+ },
6
+ "patch_size": 16,
7
+ "temporal_patch_size": 2,
8
+ "merge_size": 2,
9
+ "image_mean": [
10
+ 0.5,
11
+ 0.5,
12
+ 0.5
13
+ ],
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "Qwen3VLProcessor",
20
+ "video_processor_type": "Qwen3VLVideoProcessor"
21
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff