sophosympatheia commited on
Commit
21ae5d1
·
verified ·
1 Parent(s): 60012a9

initial upload

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,766 @@
1
  ---
2
- license: apache-2.0
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ base_model: []
3
+ library_name: transformers
4
+ tags:
5
+ - mergekit
6
+ - merge
7
+
8
  ---
9
+ # Glisten-31B-v0.1.4
10
+
11
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
12
+
13
+ ## Merge Details
14
+ ### Merge Method
15
+
16
+ This model was merged using the [DELLA](https://arxiv.org/abs/2406.11617) merge method using /home/llm/mergequant/models/BASE/gemma-4-31B-it as a base.
17
+
18
+ ### Models Merged
19
+
20
+ The following models were included in the merge:
21
+ * /home/llm/mergequant/models/BASE/Artemis-31B-v1h
22
+ * /home/llm/mergequant/models/BASE/zerofata-G4-MeroMero-31B
23
+ * /home/llm/mergequant/models/BASE/gemma-4-Ortenzya-The-Creative-Wordsmith-31B-it-uncensored-heretic
24
+
25
+ ### Configuration
26
+
27
+ The following YAML configuration was used to produce this model:
28
+
29
+ ```yaml
30
+ models:
31
+ - model: /home/llm/mergequant/models/BASE/gemma-4-31B-it
32
+ # base model — no parameters needed
33
+
34
+ - model: /home/llm/mergequant/models/BASE/gemma-4-Ortenzya-The-Creative-Wordsmith-31B-it-uncensored-heretic
35
+ parameters:
36
+ weight:
37
+ # ── Attn spike suppression — surgical ───────────────────
38
+ # L5, L7, L12, L15 anomalously high in attn KL.
39
+ # Near-zero to prevent early context binding disruption.
40
+ - filter: "layers.5.self_attn"
41
+ value: 0.03
42
+ - filter: "layers.7.self_attn"
43
+ value: 0.03
44
+ - filter: "layers.12.self_attn"
45
+ value: 0.03 # L12: 0.187 KL — primary offender
46
+ - filter: "layers.15.self_attn"
47
+ value: 0.03
48
+
49
+ # ── Dead zone 48-59: excluded ────────────────────────────
50
+ # Zero KL divergence from base — nothing to contribute.
51
+ - filter: "layers.48.self_attn"
52
+ value: 0.00
53
+ - filter: "layers.48.mlp"
54
+ value: 0.00
55
+ - filter: "layers.49.self_attn"
56
+ value: 0.00
57
+ - filter: "layers.49.mlp"
58
+ value: 0.00
59
+ - filter: "layers.50.self_attn"
60
+ value: 0.00
61
+ - filter: "layers.50.mlp"
62
+ value: 0.00
63
+ - filter: "layers.51.self_attn"
64
+ value: 0.00
65
+ - filter: "layers.51.mlp"
66
+ value: 0.00
67
+ - filter: "layers.52.self_attn"
68
+ value: 0.00
69
+ - filter: "layers.52.mlp"
70
+ value: 0.00
71
+ - filter: "layers.53.self_attn"
72
+ value: 0.00
73
+ - filter: "layers.53.mlp"
74
+ value: 0.00
75
+ - filter: "layers.54.self_attn"
76
+ value: 0.00
77
+ - filter: "layers.54.mlp"
78
+ value: 0.00
79
+ - filter: "layers.55.self_attn"
80
+ value: 0.00
81
+ - filter: "layers.55.mlp"
82
+ value: 0.00
83
+ - filter: "layers.56.self_attn"
84
+ value: 0.00
85
+ - filter: "layers.56.mlp"
86
+ value: 0.00
87
+ - filter: "layers.57.self_attn"
88
+ value: 0.00
89
+ - filter: "layers.57.mlp"
90
+ value: 0.00
91
+ - filter: "layers.58.self_attn"
92
+ value: 0.00
93
+ - filter: "layers.58.mlp"
94
+ value: 0.00
95
+ - filter: "layers.59.self_attn"
96
+ value: 0.00
97
+ - filter: "layers.59.mlp"
98
+ value: 0.00
99
+
100
+ # ── MeroMero inflection zone 26-35 ──────────────────────
101
+ # Ortenzya minimized — protecting MeroMero's detail zone.
102
+ # Scaled down proportionally to hit 1.05 budget.
103
+ - filter: "layers.26.self_attn"
104
+ value: 0.09
105
+ - filter: "layers.27.self_attn"
106
+ value: 0.09
107
+ - filter: "layers.28.self_attn"
108
+ value: 0.09
109
+ - filter: "layers.29.self_attn"
110
+ value: 0.09
111
+ - filter: "layers.30.self_attn"
112
+ value: 0.09
113
+ - filter: "layers.31.self_attn"
114
+ value: 0.09
115
+ - filter: "layers.32.self_attn"
116
+ value: 0.09
117
+ - filter: "layers.33.self_attn"
118
+ value: 0.09
119
+ - filter: "layers.34.self_attn"
120
+ value: 0.09
121
+ - filter: "layers.35.self_attn"
122
+ value: 0.09
123
+ - filter: "layers.26.mlp"
124
+ value: 0.13
125
+ - filter: "layers.27.mlp"
126
+ value: 0.13
127
+ - filter: "layers.28.mlp"
128
+ value: 0.13
129
+ - filter: "layers.29.mlp"
130
+ value: 0.13
131
+ - filter: "layers.30.mlp"
132
+ value: 0.13
133
+ - filter: "layers.31.mlp"
134
+ value: 0.13
135
+ - filter: "layers.32.mlp"
136
+ value: 0.13
137
+ - filter: "layers.33.mlp"
138
+ value: 0.13
139
+ - filter: "layers.34.mlp"
140
+ value: 0.13
141
+ - filter: "layers.35.mlp"
142
+ value: 0.13
143
+
144
+ # ── Active zone 36-43 ───────────────────────────────────
145
+ # Ortenzya's genuine creative signal zone.
146
+ # Scaled to budget — preserves relative contribution
147
+ # while keeping total sum at 1.05.
148
+ - filter: "layers.36.self_attn"
149
+ value: 0.11
150
+ - filter: "layers.37.self_attn"
151
+ value: 0.11
152
+ - filter: "layers.38.self_attn"
153
+ value: 0.11
154
+ - filter: "layers.39.self_attn"
155
+ value: 0.11
156
+ - filter: "layers.40.self_attn"
157
+ value: 0.11
158
+ - filter: "layers.41.self_attn"
159
+ value: 0.11
160
+ - filter: "layers.42.self_attn"
161
+ value: 0.11
162
+ - filter: "layers.43.self_attn"
163
+ value: 0.11
164
+ - filter: "layers.36.mlp"
165
+ value: 0.23
166
+ - filter: "layers.37.mlp"
167
+ value: 0.23
168
+ - filter: "layers.38.mlp"
169
+ value: 0.23
170
+ - filter: "layers.39.mlp"
171
+ value: 0.23
172
+ - filter: "layers.40.mlp"
173
+ value: 0.23
174
+ - filter: "layers.41.mlp"
175
+ value: 0.23
176
+ - filter: "layers.42.mlp"
177
+ value: 0.23
178
+ - filter: "layers.43.mlp"
179
+ value: 0.23
180
+
181
+ # ── Active zone 44-47: Ortenzya MLP peak ────────────────
182
+ # Highest MLP KL in the model (up to 0.098 at L47).
183
+ # Slightly higher than 36-43 to respect the peak,
184
+ # still scaled to budget.
185
+ - filter: "layers.44.self_attn"
186
+ value: 0.11
187
+ - filter: "layers.45.self_attn"
188
+ value: 0.11
189
+ - filter: "layers.46.self_attn"
190
+ value: 0.11
191
+ - filter: "layers.47.self_attn"
192
+ value: 0.11
193
+ - filter: "layers.44.mlp"
194
+ value: 0.26
195
+ - filter: "layers.45.mlp"
196
+ value: 0.26
197
+ - filter: "layers.46.mlp"
198
+ value: 0.26
199
+ - filter: "layers.47.mlp"
200
+ value: 0.26
201
+
202
+ # ── Zone-wide fallbacks ──────────────────────────────────
203
+ # Covers early layers 0-25 (excluding spike layers above).
204
+ # Scaled to budget alongside MeroMero and Artemis early values.
205
+ - filter: "self_attn"
206
+ value: 0.14
207
+ - filter: "mlp"
208
+ value: 0.26
209
+ - value: 0.25
210
+ density: 0.80
211
+ epsilon: 0.099
212
+
213
+ - model: /home/llm/mergequant/models/BASE/zerofata-G4-MeroMero-31B
214
+ parameters:
215
+ weight:
216
+ # ── Early layers 0-25 ───────────────────────────────────
217
+ # Scaled down from 0.60/0.55 to hit budget alongside
218
+ # Artemis and Ortenzya early contributions.
219
+ - filter: "layers.0.self_attn"
220
+ value: 0.57
221
+ - filter: "layers.1.self_attn"
222
+ value: 0.57
223
+ - filter: "layers.2.self_attn"
224
+ value: 0.57
225
+ - filter: "layers.3.self_attn"
226
+ value: 0.57
227
+ - filter: "layers.4.self_attn"
228
+ value: 0.57
229
+ - filter: "layers.5.self_attn"
230
+ value: 0.57
231
+ - filter: "layers.6.self_attn"
232
+ value: 0.57
233
+ - filter: "layers.7.self_attn"
234
+ value: 0.57
235
+ - filter: "layers.8.self_attn"
236
+ value: 0.57
237
+ - filter: "layers.9.self_attn"
238
+ value: 0.57
239
+ - filter: "layers.10.self_attn"
240
+ value: 0.57
241
+ - filter: "layers.11.self_attn"
242
+ value: 0.57
243
+ - filter: "layers.12.self_attn"
244
+ value: 0.57
245
+ - filter: "layers.13.self_attn"
246
+ value: 0.57
247
+ - filter: "layers.14.self_attn"
248
+ value: 0.57
249
+ - filter: "layers.15.self_attn"
250
+ value: 0.57
251
+ - filter: "layers.16.self_attn"
252
+ value: 0.57
253
+ - filter: "layers.17.self_attn"
254
+ value: 0.57
255
+ - filter: "layers.18.self_attn"
256
+ value: 0.57
257
+ - filter: "layers.19.self_attn"
258
+ value: 0.57
259
+ - filter: "layers.20.self_attn"
260
+ value: 0.57
261
+ - filter: "layers.21.self_attn"
262
+ value: 0.57
263
+ - filter: "layers.22.self_attn"
264
+ value: 0.57
265
+ - filter: "layers.23.self_attn"
266
+ value: 0.57
267
+ - filter: "layers.24.self_attn"
268
+ value: 0.57
269
+ - filter: "layers.25.self_attn"
270
+ value: 0.57
271
+ - filter: "layers.0.mlp"
272
+ value: 0.48
273
+ - filter: "layers.1.mlp"
274
+ value: 0.48
275
+ - filter: "layers.2.mlp"
276
+ value: 0.48
277
+ - filter: "layers.3.mlp"
278
+ value: 0.48
279
+ - filter: "layers.4.mlp"
280
+ value: 0.48
281
+ - filter: "layers.5.mlp"
282
+ value: 0.48
283
+ - filter: "layers.6.mlp"
284
+ value: 0.48
285
+ - filter: "layers.7.mlp"
286
+ value: 0.48
287
+ - filter: "layers.8.mlp"
288
+ value: 0.48
289
+ - filter: "layers.9.mlp"
290
+ value: 0.48
291
+ - filter: "layers.10.mlp"
292
+ value: 0.48
293
+ - filter: "layers.11.mlp"
294
+ value: 0.48
295
+ - filter: "layers.12.mlp"
296
+ value: 0.48
297
+ - filter: "layers.13.mlp"
298
+ value: 0.48
299
+ - filter: "layers.14.mlp"
300
+ value: 0.48
301
+ - filter: "layers.15.mlp"
302
+ value: 0.48
303
+ - filter: "layers.16.mlp"
304
+ value: 0.48
305
+ - filter: "layers.17.mlp"
306
+ value: 0.48
307
+ - filter: "layers.18.mlp"
308
+ value: 0.48
309
+ - filter: "layers.19.mlp"
310
+ value: 0.48
311
+ - filter: "layers.20.mlp"
312
+ value: 0.48
313
+ - filter: "layers.21.mlp"
314
+ value: 0.48
315
+ - filter: "layers.22.mlp"
316
+ value: 0.48
317
+ - filter: "layers.23.mlp"
318
+ value: 0.48
319
+ - filter: "layers.24.mlp"
320
+ value: 0.48
321
+ - filter: "layers.25.mlp"
322
+ value: 0.48
323
+
324
+ # ── MeroMero inflection zone 26-35 ──────────────────────
325
+ # MeroMero's most important zone — scaled down minimally
326
+ # to preserve dominance while hitting budget.
327
+ - filter: "layers.26.self_attn"
328
+ value: 0.73
329
+ - filter: "layers.27.self_attn"
330
+ value: 0.73
331
+ - filter: "layers.28.self_attn"
332
+ value: 0.73
333
+ - filter: "layers.29.self_attn"
334
+ value: 0.73
335
+ - filter: "layers.30.self_attn"
336
+ value: 0.73
337
+ - filter: "layers.31.self_attn"
338
+ value: 0.73
339
+ - filter: "layers.32.self_attn"
340
+ value: 0.73
341
+ - filter: "layers.33.self_attn"
342
+ value: 0.73
343
+ - filter: "layers.34.self_attn"
344
+ value: 0.73
345
+ - filter: "layers.35.self_attn"
346
+ value: 0.73
347
+ - filter: "layers.26.mlp"
348
+ value: 0.74
349
+ - filter: "layers.27.mlp"
350
+ value: 0.74
351
+ - filter: "layers.28.mlp"
352
+ value: 0.74
353
+ - filter: "layers.29.mlp"
354
+ value: 0.74
355
+ - filter: "layers.30.mlp"
356
+ value: 0.74
357
+ - filter: "layers.31.mlp"
358
+ value: 0.74
359
+ - filter: "layers.32.mlp"
360
+ value: 0.74
361
+ - filter: "layers.33.mlp"
362
+ value: 0.74
363
+ - filter: "layers.34.mlp"
364
+ value: 0.74
365
+ - filter: "layers.35.mlp"
366
+ value: 0.74
367
+
368
+ # ── Sustained active zone 36-50 ─────────────────────────
369
+ # MeroMero attn leads for structural coherence.
370
+ # MLP pulled back to give Artemis room in this zone.
371
+ - filter: "layers.36.self_attn"
372
+ value: 0.63
373
+ - filter: "layers.37.self_attn"
374
+ value: 0.63
375
+ - filter: "layers.38.self_attn"
376
+ value: 0.63
377
+ - filter: "layers.39.self_attn"
378
+ value: 0.63
379
+ - filter: "layers.40.self_attn"
380
+ value: 0.63
381
+ - filter: "layers.41.self_attn"
382
+ value: 0.63
383
+ - filter: "layers.42.self_attn"
384
+ value: 0.63
385
+ - filter: "layers.43.self_attn"
386
+ value: 0.63
387
+ - filter: "layers.44.self_attn"
388
+ value: 0.63
389
+ - filter: "layers.45.self_attn"
390
+ value: 0.63
391
+ - filter: "layers.46.self_attn"
392
+ value: 0.63
393
+ - filter: "layers.47.self_attn"
394
+ value: 0.63
395
+ - filter: "layers.48.self_attn"
396
+ value: 0.70
397
+ - filter: "layers.49.self_attn"
398
+ value: 0.70
399
+ - filter: "layers.50.self_attn"
400
+ value: 0.70
401
+ - filter: "layers.36.mlp"
402
+ value: 0.31
403
+ - filter: "layers.37.mlp"
404
+ value: 0.31
405
+ - filter: "layers.38.mlp"
406
+ value: 0.31
407
+ - filter: "layers.39.mlp"
408
+ value: 0.31
409
+ - filter: "layers.40.mlp"
410
+ value: 0.31
411
+ - filter: "layers.41.mlp"
412
+ value: 0.31
413
+ - filter: "layers.42.mlp"
414
+ value: 0.31
415
+ - filter: "layers.43.mlp"
416
+ value: 0.31
417
+ - filter: "layers.44.mlp"
418
+ value: 0.30
419
+ - filter: "layers.45.mlp"
420
+ value: 0.30
421
+ - filter: "layers.46.mlp"
422
+ value: 0.30
423
+ - filter: "layers.47.mlp"
424
+ value: 0.30
425
+ - filter: "layers.48.mlp"
426
+ value: 0.40
427
+ - filter: "layers.49.mlp"
428
+ value: 0.40
429
+ - filter: "layers.50.mlp"
430
+ value: 0.40
431
+
432
+ # ── Output zone 51-59 ───────────────────────────────────
433
+ # Ortenzya is dead here — only MeroMero and Artemis.
434
+ # Budget already at 1.05 with previous two-model values.
435
+ - filter: "layers.51.self_attn"
436
+ value: 0.65
437
+ - filter: "layers.52.self_attn"
438
+ value: 0.65
439
+ - filter: "layers.53.self_attn"
440
+ value: 0.65
441
+ - filter: "layers.54.self_attn"
442
+ value: 0.65
443
+ - filter: "layers.55.self_attn"
444
+ value: 0.65
445
+ - filter: "layers.56.self_attn"
446
+ value: 0.65
447
+ - filter: "layers.57.self_attn"
448
+ value: 0.65
449
+ - filter: "layers.58.self_attn"
450
+ value: 0.65
451
+ - filter: "layers.59.self_attn"
452
+ value: 0.65
453
+ - filter: "layers.51.mlp"
454
+ value: 0.50
455
+ - filter: "layers.52.mlp"
456
+ value: 0.50
457
+ - filter: "layers.53.mlp"
458
+ value: 0.50
459
+ - filter: "layers.54.mlp"
460
+ value: 0.50
461
+ - filter: "layers.55.mlp"
462
+ value: 0.50
463
+ - filter: "layers.56.mlp"
464
+ value: 0.50
465
+ - filter: "layers.57.mlp"
466
+ value: 0.50
467
+ - filter: "layers.58.mlp"
468
+ value: 0.50
469
+ - filter: "layers.59.mlp"
470
+ value: 0.50
471
+
472
+ # ── Zone-wide fallbacks ─────────────────────────────────
473
+ - filter: "self_attn"
474
+ value: 0.57
475
+ - filter: "mlp"
476
+ value: 0.48
477
+ - value: 0.50
478
+ density: 0.95
479
+ epsilon: 0.099
480
+
481
+ - model: /home/llm/mergequant/models/BASE/Artemis-31B-v1h
482
+ parameters:
483
+ weight:
484
+ # ── Early layers 0-25 ───────────────────────────────────
485
+ # Scaled down from 0.35 to hit budget alongside
486
+ # MeroMero and Ortenzya early contributions.
487
+ - filter: "layers.0.self_attn"
488
+ value: 0.33
489
+ - filter: "layers.1.self_attn"
490
+ value: 0.33
491
+ - filter: "layers.2.self_attn"
492
+ value: 0.33
493
+ - filter: "layers.3.self_attn"
494
+ value: 0.33
495
+ - filter: "layers.4.self_attn"
496
+ value: 0.33
497
+ - filter: "layers.5.self_attn"
498
+ value: 0.33
499
+ - filter: "layers.6.self_attn"
500
+ value: 0.33
501
+ - filter: "layers.7.self_attn"
502
+ value: 0.33
503
+ - filter: "layers.8.self_attn"
504
+ value: 0.33
505
+ - filter: "layers.9.self_attn"
506
+ value: 0.33
507
+ - filter: "layers.10.self_attn"
508
+ value: 0.33
509
+ - filter: "layers.11.self_attn"
510
+ value: 0.33
511
+ - filter: "layers.12.self_attn"
512
+ value: 0.33
513
+ - filter: "layers.13.self_attn"
514
+ value: 0.33
515
+ - filter: "layers.14.self_attn"
516
+ value: 0.33
517
+ - filter: "layers.15.self_attn"
518
+ value: 0.33
519
+ - filter: "layers.16.self_attn"
520
+ value: 0.33
521
+ - filter: "layers.17.self_attn"
522
+ value: 0.33
523
+ - filter: "layers.18.self_attn"
524
+ value: 0.33
525
+ - filter: "layers.19.self_attn"
526
+ value: 0.33
527
+ - filter: "layers.20.self_attn"
528
+ value: 0.33
529
+ - filter: "layers.21.self_attn"
530
+ value: 0.33
531
+ - filter: "layers.22.self_attn"
532
+ value: 0.33
533
+ - filter: "layers.23.self_attn"
534
+ value: 0.33
535
+ - filter: "layers.24.self_attn"
536
+ value: 0.33
537
+ - filter: "layers.25.self_attn"
538
+ value: 0.33
539
+ - filter: "layers.0.mlp"
540
+ value: 0.31
541
+ - filter: "layers.1.mlp"
542
+ value: 0.31
543
+ - filter: "layers.2.mlp"
544
+ value: 0.31
545
+ - filter: "layers.3.mlp"
546
+ value: 0.31
547
+ - filter: "layers.4.mlp"
548
+ value: 0.31
549
+ - filter: "layers.5.mlp"
550
+ value: 0.31
551
+ - filter: "layers.6.mlp"
552
+ value: 0.31
553
+ - filter: "layers.7.mlp"
554
+ value: 0.31
555
+ - filter: "layers.8.mlp"
556
+ value: 0.31
557
+ - filter: "layers.9.mlp"
558
+ value: 0.31
559
+ - filter: "layers.10.mlp"
560
+ value: 0.31
561
+ - filter: "layers.11.mlp"
562
+ value: 0.31
563
+ - filter: "layers.12.mlp"
564
+ value: 0.31
565
+ - filter: "layers.13.mlp"
566
+ value: 0.31
567
+ - filter: "layers.14.mlp"
568
+ value: 0.31
569
+ - filter: "layers.15.mlp"
570
+ value: 0.31
571
+ - filter: "layers.16.mlp"
572
+ value: 0.31
573
+ - filter: "layers.17.mlp"
574
+ value: 0.31
575
+ - filter: "layers.18.mlp"
576
+ value: 0.31
577
+ - filter: "layers.19.mlp"
578
+ value: 0.31
579
+ - filter: "layers.20.mlp"
580
+ value: 0.31
581
+ - filter: "layers.21.mlp"
582
+ value: 0.31
583
+ - filter: "layers.22.mlp"
584
+ value: 0.31
585
+ - filter: "layers.23.mlp"
586
+ value: 0.31
587
+ - filter: "layers.24.mlp"
588
+ value: 0.31
589
+ - filter: "layers.25.mlp"
590
+ value: 0.31
591
+
592
+ # ── MeroMero inflection zone 26-35 ──────────────────────
593
+ # Artemis minimized to protect MeroMero's detail zone.
594
+ # Scaled proportionally to budget.
595
+ - filter: "layers.26.self_attn"
596
+ value: 0.23
597
+ - filter: "layers.27.self_attn"
598
+ value: 0.23
599
+ - filter: "layers.28.self_attn"
600
+ value: 0.23
601
+ - filter: "layers.29.self_attn"
602
+ value: 0.23
603
+ - filter: "layers.30.self_attn"
604
+ value: 0.23
605
+ - filter: "layers.31.self_attn"
606
+ value: 0.23
607
+ - filter: "layers.32.self_attn"
608
+ value: 0.23
609
+ - filter: "layers.33.self_attn"
610
+ value: 0.23
611
+ - filter: "layers.34.self_attn"
612
+ value: 0.23
613
+ - filter: "layers.35.self_attn"
614
+ value: 0.23
615
+ - filter: "layers.26.mlp"
616
+ value: 0.18
617
+ - filter: "layers.27.mlp"
618
+ value: 0.18
619
+ - filter: "layers.28.mlp"
620
+ value: 0.18
621
+ - filter: "layers.29.mlp"
622
+ value: 0.18
623
+ - filter: "layers.30.mlp"
624
+ value: 0.18
625
+ - filter: "layers.31.mlp"
626
+ value: 0.18
627
+ - filter: "layers.32.mlp"
628
+ value: 0.18
629
+ - filter: "layers.33.mlp"
630
+ value: 0.18
631
+ - filter: "layers.34.mlp"
632
+ value: 0.18
633
+ - filter: "layers.35.mlp"
634
+ value: 0.18
635
+
636
+ # ── Sustained active zone 36-47 ─────────────────────────
637
+ # Artemis MLP leads for creative voice.
638
+ # Scaled to budget — still dominant over MeroMero MLP here.
639
+ - filter: "layers.36.self_attn"
640
+ value: 0.31
641
+ - filter: "layers.37.self_attn"
642
+ value: 0.31
643
+ - filter: "layers.38.self_attn"
644
+ value: 0.31
645
+ - filter: "layers.39.self_attn"
646
+ value: 0.31
647
+ - filter: "layers.40.self_attn"
648
+ value: 0.31
649
+ - filter: "layers.41.self_attn"
650
+ value: 0.31
651
+ - filter: "layers.42.self_attn"
652
+ value: 0.31
653
+ - filter: "layers.43.self_attn"
654
+ value: 0.31
655
+ - filter: "layers.44.self_attn"
656
+ value: 0.31
657
+ - filter: "layers.45.self_attn"
658
+ value: 0.31
659
+ - filter: "layers.46.self_attn"
660
+ value: 0.31
661
+ - filter: "layers.47.self_attn"
662
+ value: 0.31
663
+ - filter: "layers.36.mlp"
664
+ value: 0.51
665
+ - filter: "layers.37.mlp"
666
+ value: 0.51
667
+ - filter: "layers.38.mlp"
668
+ value: 0.51
669
+ - filter: "layers.39.mlp"
670
+ value: 0.51
671
+ - filter: "layers.40.mlp"
672
+ value: 0.51
673
+ - filter: "layers.41.mlp"
674
+ value: 0.51
675
+ - filter: "layers.42.mlp"
676
+ value: 0.51
677
+ - filter: "layers.43.mlp"
678
+ value: 0.51
679
+ - filter: "layers.44.mlp"
680
+ value: 0.49
681
+ - filter: "layers.45.mlp"
682
+ value: 0.49
683
+ - filter: "layers.46.mlp"
684
+ value: 0.49
685
+ - filter: "layers.47.mlp"
686
+ value: 0.49
687
+
688
+ # ── Active zone 48-50 ───────────────────────────────────
689
+ # Ortenzya dead here — back to two-model budget of 1.05.
690
+ - filter: "layers.48.self_attn"
691
+ value: 0.35
692
+ - filter: "layers.49.self_attn"
693
+ value: 0.35
694
+ - filter: "layers.50.self_attn"
695
+ value: 0.35
696
+ - filter: "layers.48.mlp"
697
+ value: 0.65
698
+ - filter: "layers.49.mlp"
699
+ value: 0.65
700
+ - filter: "layers.50.mlp"
701
+ value: 0.65
702
+
703
+ # ── Output zone 51-59 ───────────────────────────────────
704
+ # Ortenzya dead — two-model budget.
705
+ # Artemis MLP nudged down from 0.60 to 0.55
706
+ # to bring output sum to 1.05.
707
+ - filter: "layers.51.self_attn"
708
+ value: 0.40
709
+ - filter: "layers.52.self_attn"
710
+ value: 0.40
711
+ - filter: "layers.53.self_attn"
712
+ value: 0.40
713
+ - filter: "layers.54.self_attn"
714
+ value: 0.40
715
+ - filter: "layers.55.self_attn"
716
+ value: 0.40
717
+ - filter: "layers.56.self_attn"
718
+ value: 0.40
719
+ - filter: "layers.57.self_attn"
720
+ value: 0.40
721
+ - filter: "layers.58.self_attn"
722
+ value: 0.40
723
+ - filter: "layers.59.self_attn"
724
+ value: 0.40
725
+ - filter: "layers.51.mlp"
726
+ value: 0.55
727
+ - filter: "layers.52.mlp"
728
+ value: 0.55
729
+ - filter: "layers.53.mlp"
730
+ value: 0.55
731
+ - filter: "layers.54.mlp"
732
+ value: 0.55
733
+ - filter: "layers.55.mlp"
734
+ value: 0.55
735
+ - filter: "layers.56.mlp"
736
+ value: 0.55
737
+ - filter: "layers.57.mlp"
738
+ value: 0.55
739
+ - filter: "layers.58.mlp"
740
+ value: 0.55
741
+ - filter: "layers.59.mlp"
742
+ value: 0.55
743
+
744
+ # ── Zone-wide fallbacks ─────────────────────────────────
745
+ - filter: "self_attn"
746
+ value: 0.33
747
+ - filter: "mlp"
748
+ value: 0.31
749
+ - value: 0.50
750
+ density: 0.85
751
+ epsilon: 0.099
752
+
753
+ merge_method: della
754
+ base_model: /home/llm/mergequant/models/BASE/gemma-4-31B-it
755
+
756
+ parameters:
757
+ lambda: 1.0
758
+ normalize: false
759
+
760
+ tokenizer:
761
+ source: base
762
+ chat_template: auto
763
+ dtype: float32
764
+ out_dtype: bfloat16
765
+
766
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,354 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- macro format_parameters(properties, required, filter_keys=false) -%}
2
+ {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}
3
+ {%- set ns = namespace(found_first=false) -%}
4
+ {%- for key, value in properties | dictsort -%}
5
+ {%- set add_comma = false -%}
6
+ {%- if not filter_keys or key not in standard_keys -%}
7
+ {%- if ns.found_first %},{% endif -%}
8
+ {%- set ns.found_first = true -%}
9
+ {{ key }}:{
10
+ {%- if value['description'] -%}
11
+ description:<|"|>{{ value['description'] }}<|"|>
12
+ {%- set add_comma = true -%}
13
+ {%- endif -%}
14
+ {%- if value['type'] | upper == 'STRING' -%}
15
+ {%- if value['enum'] -%}
16
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
17
+ enum:{{ format_argument(value['enum']) }}
18
+ {%- endif -%}
19
+ {%- elif value['type'] | upper == 'ARRAY' -%}
20
+ {%- if value['items'] is mapping and value['items'] -%}
21
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
22
+ items:{
23
+ {%- set ns_items = namespace(found_first=false) -%}
24
+ {%- for item_key, item_value in value['items'] | dictsort -%}
25
+ {%- if item_value is not none -%}
26
+ {%- if ns_items.found_first %},{% endif -%}
27
+ {%- set ns_items.found_first = true -%}
28
+ {%- if item_key == 'properties' -%}
29
+ properties:{
30
+ {%- if item_value is mapping -%}
31
+ {{- format_parameters(item_value, value['items']['required'] | default([])) -}}
32
+ {%- endif -%}
33
+ }
34
+ {%- elif item_key == 'required' -%}
35
+ required:[
36
+ {%- for req_item in item_value -%}
37
+ <|"|>{{- req_item -}}<|"|>
38
+ {%- if not loop.last %},{% endif -%}
39
+ {%- endfor -%}
40
+ ]
41
+ {%- elif item_key == 'type' -%}
42
+ {%- if item_value is string -%}
43
+ type:{{ format_argument(item_value | upper) }}
44
+ {%- else -%}
45
+ type:{{ format_argument(item_value | map('upper') | list) }}
46
+ {%- endif -%}
47
+ {%- else -%}
48
+ {{ item_key }}:{{ format_argument(item_value) }}
49
+ {%- endif -%}
50
+ {%- endif -%}
51
+ {%- endfor -%}
52
+ }
53
+ {%- endif -%}
54
+ {%- endif -%}
55
+ {%- if value['nullable'] %}
56
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
57
+ nullable:true
58
+ {%- endif -%}
59
+ {%- if value['type'] | upper == 'OBJECT' -%}
60
+ {%- if value['properties'] is defined and value['properties'] is mapping -%}
61
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
62
+ properties:{
63
+ {{- format_parameters(value['properties'], value['required'] | default([])) -}}
64
+ }
65
+ {%- elif value is mapping -%}
66
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
67
+ properties:{
68
+ {{- format_parameters(value, value['required'] | default([]), filter_keys=true) -}}
69
+ }
70
+ {%- endif -%}
71
+ {%- if value['required'] -%}
72
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
73
+ required:[
74
+ {%- for item in value['required'] | default([]) -%}
75
+ <|"|>{{- item -}}<|"|>
76
+ {%- if not loop.last %},{% endif -%}
77
+ {%- endfor -%}
78
+ ]
79
+ {%- endif -%}
80
+ {%- endif -%}
81
+ {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}
82
+ type:<|"|>{{ value['type'] | upper }}<|"|>}
83
+ {%- endif -%}
84
+ {%- endfor -%}
85
+ {%- endmacro -%}
86
+ {%- macro format_function_declaration(tool_data) -%}
87
+ declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|>
88
+ {%- set params = tool_data['function']['parameters'] -%}
89
+ {%- if params -%}
90
+ ,parameters:{
91
+ {%- if params['properties'] -%}
92
+ properties:{ {{- format_parameters(params['properties'], params['required']) -}} },
93
+ {%- endif -%}
94
+ {%- if params['required'] -%}
95
+ required:[
96
+ {%- for item in params['required'] -%}
97
+ <|"|>{{- item -}}<|"|>
98
+ {{- ',' if not loop.last -}}
99
+ {%- endfor -%}
100
+ ],
101
+ {%- endif -%}
102
+ {%- if params['type'] -%}
103
+ type:<|"|>{{- params['type'] | upper -}}<|"|>}
104
+ {%- endif -%}
105
+ {%- endif -%}
106
+ {%- if 'response' in tool_data['function'] -%}
107
+ {%- set response_declaration = tool_data['function']['response'] -%}
108
+ ,response:{
109
+ {%- if response_declaration['description'] -%}
110
+ description:<|"|>{{- response_declaration['description'] -}}<|"|>,
111
+ {%- endif -%}
112
+ {%- if response_declaration['type'] | upper == 'OBJECT' -%}
113
+ type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>}
114
+ {%- endif -%}
115
+ {%- endif -%}
116
+ }
117
+ {%- endmacro -%}
118
+ {%- macro format_argument(argument, escape_keys=True) -%}
119
+ {%- if argument is string -%}
120
+ {{- '<|"|>' + argument + '<|"|>' -}}
121
+ {%- elif argument is boolean -%}
122
+ {{- 'true' if argument else 'false' -}}
123
+ {%- elif argument is mapping -%}
124
+ {{- '{' -}}
125
+ {%- set ns = namespace(found_first=false) -%}
126
+ {%- for key, value in argument | dictsort -%}
127
+ {%- if ns.found_first %},{% endif -%}
128
+ {%- set ns.found_first = true -%}
129
+ {%- if escape_keys -%}
130
+ {{- '<|"|>' + key + '<|"|>' -}}
131
+ {%- else -%}
132
+ {{- key -}}
133
+ {%- endif -%}
134
+ :{{- format_argument(value, escape_keys=escape_keys) -}}
135
+ {%- endfor -%}
136
+ {{- '}' -}}
137
+ {%- elif argument is sequence -%}
138
+ {{- '[' -}}
139
+ {%- for item in argument -%}
140
+ {{- format_argument(item, escape_keys=escape_keys) -}}
141
+ {%- if not loop.last %},{% endif -%}
142
+ {%- endfor -%}
143
+ {{- ']' -}}
144
+ {%- else -%}
145
+ {{- argument -}}
146
+ {%- endif -%}
147
+ {%- endmacro -%}
148
+ {%- macro strip_thinking(text) -%}
149
+ {%- set ns = namespace(result='') -%}
150
+ {%- for part in text.split('<channel|>') -%}
151
+ {%- if '<|channel>' in part -%}
152
+ {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}
153
+ {%- else -%}
154
+ {%- set ns.result = ns.result + part -%}
155
+ {%- endif -%}
156
+ {%- endfor -%}
157
+ {{- ns.result | trim -}}
158
+ {%- endmacro -%}
159
+
160
+ {%- macro format_tool_response_block(tool_name, response) -%}
161
+ {{- '<|tool_response>' -}}
162
+ {%- if response is mapping -%}
163
+ {{- 'response:' + tool_name + '{' -}}
164
+ {%- for key, value in response | dictsort -%}
165
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
166
+ {%- if not loop.last %},{% endif -%}
167
+ {%- endfor -%}
168
+ {{- '}' -}}
169
+ {%- else -%}
170
+ {{- 'response:' + tool_name + '{value:' + format_argument(response, escape_keys=False) + '}' -}}
171
+ {%- endif -%}
172
+ {{- '<tool_response|>' -}}
173
+ {%- endmacro -%}
174
+
175
+ {%- set ns = namespace(prev_message_type=None) -%}
176
+ {%- set loop_messages = messages -%}
177
+ {{- bos_token -}}
178
+ {#- Handle System/Tool Definitions Block -#}
179
+ {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
+ {{- '<|turn>system\n' -}}
181
+ {#- Inject Thinking token at the very top of the FIRST system turn -#}
182
+ {%- if enable_thinking is defined and enable_thinking -%}
183
+ {{- '<|think|>\n' -}}
184
+ {%- set ns.prev_message_type = 'think' -%}
185
+ {%- endif -%}
186
+ {%- if messages[0]['role'] in ['system', 'developer'] -%}
187
+ {%- if messages[0]['content'] is string -%}
188
+ {{- messages[0]['content'] | trim -}}
189
+ {%- elif messages[0]['content'] is sequence -%}
190
+ {%- for item in messages[0]['content'] -%}
191
+ {{- item['text'] | trim + ' '-}}
192
+ {%- endfor -%}
193
+ {%- endif -%}
194
+ {%- set loop_messages = messages[1:] -%}
195
+ {%- endif -%}
196
+ {%- if tools -%}
197
+ {%- for tool in tools %}
198
+ {{- '<|tool>' -}}
199
+ {{- format_function_declaration(tool) | trim -}}
200
+ {{- '<tool|>' -}}
201
+ {%- endfor %}
202
+ {%- set ns.prev_message_type = 'tool' -%}
203
+ {%- endif -%}
204
+ {{- '<turn|>\n' -}}
205
+ {%- endif %}
206
+
207
+ {#- Pre-scan: find last user message index for reasoning guard -#}
208
+ {%- set ns_turn = namespace(last_user_idx=-1) -%}
209
+ {%- for i in range(loop_messages | length) -%}
210
+ {%- if loop_messages[i]['role'] == 'user' -%}
211
+ {%- set ns_turn.last_user_idx = i -%}
212
+ {%- endif -%}
213
+ {%- endfor -%}
214
+
215
+ {#- Loop through messages -#}
216
+ {%- for message in loop_messages -%}
217
+ {%- if message['role'] != 'tool' -%}
218
+ {%- set ns.prev_message_type = None -%}
219
+ {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
220
+ {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
221
+ {%- set prev_nt = namespace(role=None, found=false) -%}
222
+ {%- if loop.index0 > 0 -%}
223
+ {%- for j in range(loop.index0 - 1, -1, -1) -%}
224
+ {%- if not prev_nt.found -%}
225
+ {%- if loop_messages[j]['role'] != 'tool' -%}
226
+ {%- set prev_nt.role = loop_messages[j]['role'] -%}
227
+ {%- set prev_nt.found = true -%}
228
+ {%- endif -%}
229
+ {%- endif -%}
230
+ {%- endfor -%}
231
+ {%- endif -%}
232
+ {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
233
+ {%- if not continue_same_model_turn -%}
234
+ {{- '<|turn>' + role + '\n' }}
235
+ {%- endif -%}
236
+
237
+ {#- Render reasoning/reasoning_content as thinking channel -#}
238
+ {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
239
+ {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
240
+ {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
241
+ {%- endif -%}
242
+
243
+ {%- if message['tool_calls'] -%}
244
+ {%- for tool_call in message['tool_calls'] -%}
245
+ {%- set function = tool_call['function'] -%}
246
+ {{- '<|tool_call>call:' + function['name'] + '{' -}}
247
+ {%- if function['arguments'] is mapping -%}
248
+ {%- set ns_args = namespace(found_first=false) -%}
249
+ {%- for key, value in function['arguments'] | dictsort -%}
250
+ {%- if ns_args.found_first %},{% endif -%}
251
+ {%- set ns_args.found_first = true -%}
252
+ {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
253
+ {%- endfor -%}
254
+ {%- elif function['arguments'] is string -%}
255
+ {{- function['arguments'] -}}
256
+ {%- endif -%}
257
+ {{- '}<tool_call|>' -}}
258
+ {%- endfor -%}
259
+ {%- set ns.prev_message_type = 'tool_call' -%}
260
+ {%- endif -%}
261
+
262
+ {%- set ns_tr_out = namespace(flag=false) -%}
263
+ {%- if message.get('tool_responses') -%}
264
+ {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
265
+ {%- for tool_response in message['tool_responses'] -%}
266
+ {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
267
+ {%- set ns_tr_out.flag = true -%}
268
+ {%- set ns.prev_message_type = 'tool_response' -%}
269
+ {%- endfor -%}
270
+ {%- elif message.get('tool_calls') -%}
271
+ {#- OpenAI Chat Completions: forward-scan consecutive role:tool messages -#}
272
+ {%- set ns_tool_scan = namespace(stopped=false) -%}
273
+ {%- for k in range(loop.index0 + 1, loop_messages | length) -%}
274
+ {%- if ns_tool_scan.stopped -%}
275
+ {%- elif loop_messages[k]['role'] != 'tool' -%}
276
+ {%- set ns_tool_scan.stopped = true -%}
277
+ {%- else -%}
278
+ {%- set follow = loop_messages[k] -%}
279
+ {#- Resolve tool_call_id to function name -#}
280
+ {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
281
+ {%- for tc in message['tool_calls'] -%}
282
+ {%- if tc.get('id') == follow.get('tool_call_id') -%}
283
+ {%- set ns_tname.name = tc['function']['name'] -%}
284
+ {%- endif -%}
285
+ {%- endfor -%}
286
+ {#- Handle content as string or content-parts array -#}
287
+ {%- set tool_body = follow.get('content') -%}
288
+ {%- if tool_body is string -%}
289
+ {{- format_tool_response_block(ns_tname.name, tool_body) -}}
290
+ {%- elif tool_body is sequence and tool_body is not string -%}
291
+ {%- set ns_txt = namespace(s='') -%}
292
+ {%- for part in tool_body -%}
293
+ {%- if part.get('type') == 'text' -%}
294
+ {%- set ns_txt.s = ns_txt.s + (part.get('text') | default('')) -%}
295
+ {%- endif -%}
296
+ {%- endfor -%}
297
+ {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
298
+ {%- else -%}
299
+ {{- format_tool_response_block(ns_tname.name, tool_body) -}}
300
+ {%- endif -%}
301
+ {%- set ns_tr_out.flag = true -%}
302
+ {%- set ns.prev_message_type = 'tool_response' -%}
303
+ {%- endif -%}
304
+ {%- endfor -%}
305
+ {%- endif -%}
306
+
307
+ {%- set captured_content -%}
308
+ {%- if message['content'] is string -%}
309
+ {%- if role == 'model' -%}
310
+ {{- strip_thinking(message['content']) -}}
311
+ {%- else -%}
312
+ {{- message['content'] | trim -}}
313
+ {%- endif -%}
314
+ {%- elif message['content'] is sequence -%}
315
+ {%- for item in message['content'] -%}
316
+ {%- if item['type'] == 'text' -%}
317
+ {%- if role == 'model' -%}
318
+ {{- strip_thinking(item['text']) -}}
319
+ {%- else -%}
320
+ {{- item['text'] | trim -}}
321
+ {%- endif -%}
322
+ {%- elif item['type'] == 'image' -%}
323
+ {{- '<|image|>' -}}
324
+ {%- set ns.prev_message_type = 'image' -%}
325
+ {%- elif item['type'] == 'audio' -%}
326
+ {{- '<|audio|>' -}}
327
+ {%- set ns.prev_message_type = 'audio' -%}
328
+ {%- elif item['type'] == 'video' -%}
329
+ {{- '<|video|>' -}}
330
+ {%- set ns.prev_message_type = 'video' -%}
331
+ {%- endif -%}
332
+ {%- endfor -%}
333
+ {%- endif -%}
334
+ {%- endset -%}
335
+
336
+ {{- captured_content -}}
337
+ {%- set has_content = captured_content | trim | length > 0 -%}
338
+
339
+ {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
340
+ {{- '<|tool_response>' -}}
341
+ {%- elif not (ns_tr_out.flag and not has_content) -%}
342
+ {{- '<turn|>\n' -}}
343
+ {%- endif -%}
344
+ {%- endif -%}
345
+ {%- endfor -%}
346
+
347
+ {%- if add_generation_prompt -%}
348
+ {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
349
+ {{- '<|turn>model\n' -}}
350
+ {%- if not enable_thinking | default(false) -%}
351
+ {{- '<|channel>thought\n<channel|>' -}}
352
+ {%- endif -%}
353
+ {%- endif -%}
354
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Gemma4ForConditionalGeneration"
4
+ ],
5
+ "audio_config": null,
6
+ "audio_token_id": 258881,
7
+ "boa_token_id": 256000,
8
+ "boi_token_id": 255999,
9
+ "dtype": "bfloat16",
10
+ "eoa_token_id": 258883,
11
+ "eoa_token_index": 258883,
12
+ "eoi_token_id": 258882,
13
+ "eos_token_id": [
14
+ 1,
15
+ 106
16
+ ],
17
+ "image_token_id": 258880,
18
+ "initializer_range": 0.02,
19
+ "model_type": "gemma4",
20
+ "text_config": {
21
+ "attention_bias": false,
22
+ "attention_dropout": 0.0,
23
+ "attention_k_eq_v": true,
24
+ "bos_token_id": 2,
25
+ "dtype": "bfloat16",
26
+ "enable_moe_block": false,
27
+ "eos_token_id": 1,
28
+ "expert_intermediate_size": null,
29
+ "final_logit_softcapping": 30.0,
30
+ "global_head_dim": 512,
31
+ "head_dim": 256,
32
+ "hidden_activation": "gelu_pytorch_tanh",
33
+ "hidden_size": 5376,
34
+ "hidden_size_per_layer_input": 0,
35
+ "initializer_range": 0.02,
36
+ "intermediate_size": 21504,
37
+ "layer_types": [
38
+ "sliding_attention",
39
+ "sliding_attention",
40
+ "sliding_attention",
41
+ "sliding_attention",
42
+ "sliding_attention",
43
+ "full_attention",
44
+ "sliding_attention",
45
+ "sliding_attention",
46
+ "sliding_attention",
47
+ "sliding_attention",
48
+ "sliding_attention",
49
+ "full_attention",
50
+ "sliding_attention",
51
+ "sliding_attention",
52
+ "sliding_attention",
53
+ "sliding_attention",
54
+ "sliding_attention",
55
+ "full_attention",
56
+ "sliding_attention",
57
+ "sliding_attention",
58
+ "sliding_attention",
59
+ "sliding_attention",
60
+ "sliding_attention",
61
+ "full_attention",
62
+ "sliding_attention",
63
+ "sliding_attention",
64
+ "sliding_attention",
65
+ "sliding_attention",
66
+ "sliding_attention",
67
+ "full_attention",
68
+ "sliding_attention",
69
+ "sliding_attention",
70
+ "sliding_attention",
71
+ "sliding_attention",
72
+ "sliding_attention",
73
+ "full_attention",
74
+ "sliding_attention",
75
+ "sliding_attention",
76
+ "sliding_attention",
77
+ "sliding_attention",
78
+ "sliding_attention",
79
+ "full_attention",
80
+ "sliding_attention",
81
+ "sliding_attention",
82
+ "sliding_attention",
83
+ "sliding_attention",
84
+ "sliding_attention",
85
+ "full_attention",
86
+ "sliding_attention",
87
+ "sliding_attention",
88
+ "sliding_attention",
89
+ "sliding_attention",
90
+ "sliding_attention",
91
+ "full_attention",
92
+ "sliding_attention",
93
+ "sliding_attention",
94
+ "sliding_attention",
95
+ "sliding_attention",
96
+ "sliding_attention",
97
+ "full_attention"
98
+ ],
99
+ "max_position_embeddings": 262144,
100
+ "model_type": "gemma4_text",
101
+ "moe_intermediate_size": null,
102
+ "num_attention_heads": 32,
103
+ "num_experts": null,
104
+ "num_global_key_value_heads": 4,
105
+ "num_hidden_layers": 60,
106
+ "num_key_value_heads": 16,
107
+ "num_kv_shared_layers": 0,
108
+ "pad_token_id": 0,
109
+ "rms_norm_eps": 1e-06,
110
+ "rope_parameters": {
111
+ "full_attention": {
112
+ "partial_rotary_factor": 0.25,
113
+ "rope_theta": 1000000.0,
114
+ "rope_type": "proportional"
115
+ },
116
+ "sliding_attention": {
117
+ "rope_theta": 10000.0,
118
+ "rope_type": "default"
119
+ }
120
+ },
121
+ "sliding_window": 1024,
122
+ "tie_word_embeddings": true,
123
+ "top_k_experts": null,
124
+ "use_bidirectional_attention": "vision",
125
+ "use_cache": true,
126
+ "use_double_wide_mlp": false,
127
+ "vocab_size": 262144,
128
+ "vocab_size_per_layer_input": 262144
129
+ },
130
+ "tie_word_embeddings": true,
131
+ "transformers_version": "5.7.0",
132
+ "video_token_id": 258884,
133
+ "vision_config": {
134
+ "_name_or_path": "",
135
+ "architectures": null,
136
+ "attention_bias": false,
137
+ "attention_dropout": 0.0,
138
+ "chunk_size_feed_forward": 0,
139
+ "default_output_length": 280,
140
+ "dtype": "bfloat16",
141
+ "global_head_dim": 72,
142
+ "head_dim": 72,
143
+ "hidden_activation": "gelu_pytorch_tanh",
144
+ "hidden_size": 1152,
145
+ "id2label": {
146
+ "0": "LABEL_0",
147
+ "1": "LABEL_1"
148
+ },
149
+ "initializer_range": 0.02,
150
+ "intermediate_size": 4304,
151
+ "is_encoder_decoder": false,
152
+ "label2id": {
153
+ "LABEL_0": 0,
154
+ "LABEL_1": 1
155
+ },
156
+ "max_position_embeddings": 131072,
157
+ "model_type": "gemma4_vision",
158
+ "num_attention_heads": 16,
159
+ "num_hidden_layers": 27,
160
+ "num_key_value_heads": 16,
161
+ "output_attentions": false,
162
+ "output_hidden_states": false,
163
+ "patch_size": 16,
164
+ "pooling_kernel_size": 3,
165
+ "position_embedding_size": 10240,
166
+ "problem_type": null,
167
+ "return_dict": true,
168
+ "rms_norm_eps": 1e-06,
169
+ "rope_parameters": {
170
+ "rope_theta": 100.0,
171
+ "rope_type": "default"
172
+ },
173
+ "standardize": true,
174
+ "use_clipped_linears": false
175
+ },
176
+ "vision_soft_tokens_per_image": 280
177
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,735 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ models:
2
+ - model: google/gemma-4-31B-it
3
+ # base model — no parameters needed
4
+
5
+ - model: llmfan46/gemma-4-Ortenzya-The-Creative-Wordsmith-31B-it-uncensored-heretic
6
+ parameters:
7
+ weight:
8
+ # ── Attn spike suppression — surgical ───────────────────
9
+ # L5, L7, L12, L15 anomalously high in attn KL.
10
+ # Near-zero to prevent early context binding disruption.
11
+ - filter: "layers.5.self_attn"
12
+ value: 0.03
13
+ - filter: "layers.7.self_attn"
14
+ value: 0.03
15
+ - filter: "layers.12.self_attn"
16
+ value: 0.03 # L12: 0.187 KL — primary offender
17
+ - filter: "layers.15.self_attn"
18
+ value: 0.03
19
+
20
+ # ── Dead zone 48-59: excluded ────────────────────────────
21
+ # Zero KL divergence from base — nothing to contribute.
22
+ - filter: "layers.48.self_attn"
23
+ value: 0.00
24
+ - filter: "layers.48.mlp"
25
+ value: 0.00
26
+ - filter: "layers.49.self_attn"
27
+ value: 0.00
28
+ - filter: "layers.49.mlp"
29
+ value: 0.00
30
+ - filter: "layers.50.self_attn"
31
+ value: 0.00
32
+ - filter: "layers.50.mlp"
33
+ value: 0.00
34
+ - filter: "layers.51.self_attn"
35
+ value: 0.00
36
+ - filter: "layers.51.mlp"
37
+ value: 0.00
38
+ - filter: "layers.52.self_attn"
39
+ value: 0.00
40
+ - filter: "layers.52.mlp"
41
+ value: 0.00
42
+ - filter: "layers.53.self_attn"
43
+ value: 0.00
44
+ - filter: "layers.53.mlp"
45
+ value: 0.00
46
+ - filter: "layers.54.self_attn"
47
+ value: 0.00
48
+ - filter: "layers.54.mlp"
49
+ value: 0.00
50
+ - filter: "layers.55.self_attn"
51
+ value: 0.00
52
+ - filter: "layers.55.mlp"
53
+ value: 0.00
54
+ - filter: "layers.56.self_attn"
55
+ value: 0.00
56
+ - filter: "layers.56.mlp"
57
+ value: 0.00
58
+ - filter: "layers.57.self_attn"
59
+ value: 0.00
60
+ - filter: "layers.57.mlp"
61
+ value: 0.00
62
+ - filter: "layers.58.self_attn"
63
+ value: 0.00
64
+ - filter: "layers.58.mlp"
65
+ value: 0.00
66
+ - filter: "layers.59.self_attn"
67
+ value: 0.00
68
+ - filter: "layers.59.mlp"
69
+ value: 0.00
70
+
71
+ # ── MeroMero inflection zone 26-35 ──────────────────────
72
+ # Ortenzya minimized — protecting MeroMero's detail zone.
73
+ # Scaled down proportionally to hit 1.05 budget.
74
+ - filter: "layers.26.self_attn"
75
+ value: 0.09
76
+ - filter: "layers.27.self_attn"
77
+ value: 0.09
78
+ - filter: "layers.28.self_attn"
79
+ value: 0.09
80
+ - filter: "layers.29.self_attn"
81
+ value: 0.09
82
+ - filter: "layers.30.self_attn"
83
+ value: 0.09
84
+ - filter: "layers.31.self_attn"
85
+ value: 0.09
86
+ - filter: "layers.32.self_attn"
87
+ value: 0.09
88
+ - filter: "layers.33.self_attn"
89
+ value: 0.09
90
+ - filter: "layers.34.self_attn"
91
+ value: 0.09
92
+ - filter: "layers.35.self_attn"
93
+ value: 0.09
94
+ - filter: "layers.26.mlp"
95
+ value: 0.13
96
+ - filter: "layers.27.mlp"
97
+ value: 0.13
98
+ - filter: "layers.28.mlp"
99
+ value: 0.13
100
+ - filter: "layers.29.mlp"
101
+ value: 0.13
102
+ - filter: "layers.30.mlp"
103
+ value: 0.13
104
+ - filter: "layers.31.mlp"
105
+ value: 0.13
106
+ - filter: "layers.32.mlp"
107
+ value: 0.13
108
+ - filter: "layers.33.mlp"
109
+ value: 0.13
110
+ - filter: "layers.34.mlp"
111
+ value: 0.13
112
+ - filter: "layers.35.mlp"
113
+ value: 0.13
114
+
115
+ # ── Active zone 36-43 ───────────────────────────────────
116
+ # Ortenzya's genuine creative signal zone.
117
+ # Scaled to budget — preserves relative contribution
118
+ # while keeping total sum at 1.05.
119
+ - filter: "layers.36.self_attn"
120
+ value: 0.11
121
+ - filter: "layers.37.self_attn"
122
+ value: 0.11
123
+ - filter: "layers.38.self_attn"
124
+ value: 0.11
125
+ - filter: "layers.39.self_attn"
126
+ value: 0.11
127
+ - filter: "layers.40.self_attn"
128
+ value: 0.11
129
+ - filter: "layers.41.self_attn"
130
+ value: 0.11
131
+ - filter: "layers.42.self_attn"
132
+ value: 0.11
133
+ - filter: "layers.43.self_attn"
134
+ value: 0.11
135
+ - filter: "layers.36.mlp"
136
+ value: 0.23
137
+ - filter: "layers.37.mlp"
138
+ value: 0.23
139
+ - filter: "layers.38.mlp"
140
+ value: 0.23
141
+ - filter: "layers.39.mlp"
142
+ value: 0.23
143
+ - filter: "layers.40.mlp"
144
+ value: 0.23
145
+ - filter: "layers.41.mlp"
146
+ value: 0.23
147
+ - filter: "layers.42.mlp"
148
+ value: 0.23
149
+ - filter: "layers.43.mlp"
150
+ value: 0.23
151
+
152
+ # ── Active zone 44-47: Ortenzya MLP peak ────────────────
153
+ # Highest MLP KL in the model (up to 0.098 at L47).
154
+ # Slightly higher than 36-43 to respect the peak,
155
+ # still scaled to budget.
156
+ - filter: "layers.44.self_attn"
157
+ value: 0.11
158
+ - filter: "layers.45.self_attn"
159
+ value: 0.11
160
+ - filter: "layers.46.self_attn"
161
+ value: 0.11
162
+ - filter: "layers.47.self_attn"
163
+ value: 0.11
164
+ - filter: "layers.44.mlp"
165
+ value: 0.26
166
+ - filter: "layers.45.mlp"
167
+ value: 0.26
168
+ - filter: "layers.46.mlp"
169
+ value: 0.26
170
+ - filter: "layers.47.mlp"
171
+ value: 0.26
172
+
173
+ # ── Zone-wide fallbacks ──────────────────────────────────
174
+ # Covers early layers 0-25 (excluding spike layers above).
175
+ # Scaled to budget alongside MeroMero and Artemis early values.
176
+ - filter: "self_attn"
177
+ value: 0.14
178
+ - filter: "mlp"
179
+ value: 0.26
180
+ - value: 0.25
181
+ density: 0.80
182
+ epsilon: 0.099
183
+
184
+ - model: zerofata/G4-MeroMero-31B
185
+ parameters:
186
+ weight:
187
+ # ── Early layers 0-25 ───────────────────────────────────
188
+ # Scaled down from 0.60/0.55 to hit budget alongside
189
+ # Artemis and Ortenzya early contributions.
190
+ - filter: "layers.0.self_attn"
191
+ value: 0.57
192
+ - filter: "layers.1.self_attn"
193
+ value: 0.57
194
+ - filter: "layers.2.self_attn"
195
+ value: 0.57
196
+ - filter: "layers.3.self_attn"
197
+ value: 0.57
198
+ - filter: "layers.4.self_attn"
199
+ value: 0.57
200
+ - filter: "layers.5.self_attn"
201
+ value: 0.57
202
+ - filter: "layers.6.self_attn"
203
+ value: 0.57
204
+ - filter: "layers.7.self_attn"
205
+ value: 0.57
206
+ - filter: "layers.8.self_attn"
207
+ value: 0.57
208
+ - filter: "layers.9.self_attn"
209
+ value: 0.57
210
+ - filter: "layers.10.self_attn"
211
+ value: 0.57
212
+ - filter: "layers.11.self_attn"
213
+ value: 0.57
214
+ - filter: "layers.12.self_attn"
215
+ value: 0.57
216
+ - filter: "layers.13.self_attn"
217
+ value: 0.57
218
+ - filter: "layers.14.self_attn"
219
+ value: 0.57
220
+ - filter: "layers.15.self_attn"
221
+ value: 0.57
222
+ - filter: "layers.16.self_attn"
223
+ value: 0.57
224
+ - filter: "layers.17.self_attn"
225
+ value: 0.57
226
+ - filter: "layers.18.self_attn"
227
+ value: 0.57
228
+ - filter: "layers.19.self_attn"
229
+ value: 0.57
230
+ - filter: "layers.20.self_attn"
231
+ value: 0.57
232
+ - filter: "layers.21.self_attn"
233
+ value: 0.57
234
+ - filter: "layers.22.self_attn"
235
+ value: 0.57
236
+ - filter: "layers.23.self_attn"
237
+ value: 0.57
238
+ - filter: "layers.24.self_attn"
239
+ value: 0.57
240
+ - filter: "layers.25.self_attn"
241
+ value: 0.57
242
+ - filter: "layers.0.mlp"
243
+ value: 0.48
244
+ - filter: "layers.1.mlp"
245
+ value: 0.48
246
+ - filter: "layers.2.mlp"
247
+ value: 0.48
248
+ - filter: "layers.3.mlp"
249
+ value: 0.48
250
+ - filter: "layers.4.mlp"
251
+ value: 0.48
252
+ - filter: "layers.5.mlp"
253
+ value: 0.48
254
+ - filter: "layers.6.mlp"
255
+ value: 0.48
256
+ - filter: "layers.7.mlp"
257
+ value: 0.48
258
+ - filter: "layers.8.mlp"
259
+ value: 0.48
260
+ - filter: "layers.9.mlp"
261
+ value: 0.48
262
+ - filter: "layers.10.mlp"
263
+ value: 0.48
264
+ - filter: "layers.11.mlp"
265
+ value: 0.48
266
+ - filter: "layers.12.mlp"
267
+ value: 0.48
268
+ - filter: "layers.13.mlp"
269
+ value: 0.48
270
+ - filter: "layers.14.mlp"
271
+ value: 0.48
272
+ - filter: "layers.15.mlp"
273
+ value: 0.48
274
+ - filter: "layers.16.mlp"
275
+ value: 0.48
276
+ - filter: "layers.17.mlp"
277
+ value: 0.48
278
+ - filter: "layers.18.mlp"
279
+ value: 0.48
280
+ - filter: "layers.19.mlp"
281
+ value: 0.48
282
+ - filter: "layers.20.mlp"
283
+ value: 0.48
284
+ - filter: "layers.21.mlp"
285
+ value: 0.48
286
+ - filter: "layers.22.mlp"
287
+ value: 0.48
288
+ - filter: "layers.23.mlp"
289
+ value: 0.48
290
+ - filter: "layers.24.mlp"
291
+ value: 0.48
292
+ - filter: "layers.25.mlp"
293
+ value: 0.48
294
+
295
+ # ── MeroMero inflection zone 26-35 ──────────────────────
296
+ # MeroMero's most important zone — scaled down minimally
297
+ # to preserve dominance while hitting budget.
298
+ - filter: "layers.26.self_attn"
299
+ value: 0.73
300
+ - filter: "layers.27.self_attn"
301
+ value: 0.73
302
+ - filter: "layers.28.self_attn"
303
+ value: 0.73
304
+ - filter: "layers.29.self_attn"
305
+ value: 0.73
306
+ - filter: "layers.30.self_attn"
307
+ value: 0.73
308
+ - filter: "layers.31.self_attn"
309
+ value: 0.73
310
+ - filter: "layers.32.self_attn"
311
+ value: 0.73
312
+ - filter: "layers.33.self_attn"
313
+ value: 0.73
314
+ - filter: "layers.34.self_attn"
315
+ value: 0.73
316
+ - filter: "layers.35.self_attn"
317
+ value: 0.73
318
+ - filter: "layers.26.mlp"
319
+ value: 0.74
320
+ - filter: "layers.27.mlp"
321
+ value: 0.74
322
+ - filter: "layers.28.mlp"
323
+ value: 0.74
324
+ - filter: "layers.29.mlp"
325
+ value: 0.74
326
+ - filter: "layers.30.mlp"
327
+ value: 0.74
328
+ - filter: "layers.31.mlp"
329
+ value: 0.74
330
+ - filter: "layers.32.mlp"
331
+ value: 0.74
332
+ - filter: "layers.33.mlp"
333
+ value: 0.74
334
+ - filter: "layers.34.mlp"
335
+ value: 0.74
336
+ - filter: "layers.35.mlp"
337
+ value: 0.74
338
+
339
+ # ── Sustained active zone 36-50 ─────────────────────────
340
+ # MeroMero attn leads for structural coherence.
341
+ # MLP pulled back to give Artemis room in this zone.
342
+ - filter: "layers.36.self_attn"
343
+ value: 0.63
344
+ - filter: "layers.37.self_attn"
345
+ value: 0.63
346
+ - filter: "layers.38.self_attn"
347
+ value: 0.63
348
+ - filter: "layers.39.self_attn"
349
+ value: 0.63
350
+ - filter: "layers.40.self_attn"
351
+ value: 0.63
352
+ - filter: "layers.41.self_attn"
353
+ value: 0.63
354
+ - filter: "layers.42.self_attn"
355
+ value: 0.63
356
+ - filter: "layers.43.self_attn"
357
+ value: 0.63
358
+ - filter: "layers.44.self_attn"
359
+ value: 0.63
360
+ - filter: "layers.45.self_attn"
361
+ value: 0.63
362
+ - filter: "layers.46.self_attn"
363
+ value: 0.63
364
+ - filter: "layers.47.self_attn"
365
+ value: 0.63
366
+ - filter: "layers.48.self_attn"
367
+ value: 0.70
368
+ - filter: "layers.49.self_attn"
369
+ value: 0.70
370
+ - filter: "layers.50.self_attn"
371
+ value: 0.70
372
+ - filter: "layers.36.mlp"
373
+ value: 0.31
374
+ - filter: "layers.37.mlp"
375
+ value: 0.31
376
+ - filter: "layers.38.mlp"
377
+ value: 0.31
378
+ - filter: "layers.39.mlp"
379
+ value: 0.31
380
+ - filter: "layers.40.mlp"
381
+ value: 0.31
382
+ - filter: "layers.41.mlp"
383
+ value: 0.31
384
+ - filter: "layers.42.mlp"
385
+ value: 0.31
386
+ - filter: "layers.43.mlp"
387
+ value: 0.31
388
+ - filter: "layers.44.mlp"
389
+ value: 0.30
390
+ - filter: "layers.45.mlp"
391
+ value: 0.30
392
+ - filter: "layers.46.mlp"
393
+ value: 0.30
394
+ - filter: "layers.47.mlp"
395
+ value: 0.30
396
+ - filter: "layers.48.mlp"
397
+ value: 0.40
398
+ - filter: "layers.49.mlp"
399
+ value: 0.40
400
+ - filter: "layers.50.mlp"
401
+ value: 0.40
402
+
403
+ # ── Output zone 51-59 ───────────────────────────────────
404
+ # Ortenzya is dead here — only MeroMero and Artemis.
405
+ # Budget already at 1.05 with previous two-model values.
406
+ - filter: "layers.51.self_attn"
407
+ value: 0.65
408
+ - filter: "layers.52.self_attn"
409
+ value: 0.65
410
+ - filter: "layers.53.self_attn"
411
+ value: 0.65
412
+ - filter: "layers.54.self_attn"
413
+ value: 0.65
414
+ - filter: "layers.55.self_attn"
415
+ value: 0.65
416
+ - filter: "layers.56.self_attn"
417
+ value: 0.65
418
+ - filter: "layers.57.self_attn"
419
+ value: 0.65
420
+ - filter: "layers.58.self_attn"
421
+ value: 0.65
422
+ - filter: "layers.59.self_attn"
423
+ value: 0.65
424
+ - filter: "layers.51.mlp"
425
+ value: 0.50
426
+ - filter: "layers.52.mlp"
427
+ value: 0.50
428
+ - filter: "layers.53.mlp"
429
+ value: 0.50
430
+ - filter: "layers.54.mlp"
431
+ value: 0.50
432
+ - filter: "layers.55.mlp"
433
+ value: 0.50
434
+ - filter: "layers.56.mlp"
435
+ value: 0.50
436
+ - filter: "layers.57.mlp"
437
+ value: 0.50
438
+ - filter: "layers.58.mlp"
439
+ value: 0.50
440
+ - filter: "layers.59.mlp"
441
+ value: 0.50
442
+
443
+ # ── Zone-wide fallbacks ─────────────────────────────────
444
+ - filter: "self_attn"
445
+ value: 0.57
446
+ - filter: "mlp"
447
+ value: 0.48
448
+ - value: 0.50
449
+ density: 0.95
450
+ epsilon: 0.099
451
+
452
+ - model: BeaverAI/Artemis-31B-v1h-GGUF # Reversed from Q8 to fp16 for merging
453
+ parameters:
454
+ weight:
455
+ # ── Early layers 0-25 ───────────────────────────────────
456
+ # Scaled down from 0.35 to hit budget alongside
457
+ # MeroMero and Ortenzya early contributions.
458
+ - filter: "layers.0.self_attn"
459
+ value: 0.33
460
+ - filter: "layers.1.self_attn"
461
+ value: 0.33
462
+ - filter: "layers.2.self_attn"
463
+ value: 0.33
464
+ - filter: "layers.3.self_attn"
465
+ value: 0.33
466
+ - filter: "layers.4.self_attn"
467
+ value: 0.33
468
+ - filter: "layers.5.self_attn"
469
+ value: 0.33
470
+ - filter: "layers.6.self_attn"
471
+ value: 0.33
472
+ - filter: "layers.7.self_attn"
473
+ value: 0.33
474
+ - filter: "layers.8.self_attn"
475
+ value: 0.33
476
+ - filter: "layers.9.self_attn"
477
+ value: 0.33
478
+ - filter: "layers.10.self_attn"
479
+ value: 0.33
480
+ - filter: "layers.11.self_attn"
481
+ value: 0.33
482
+ - filter: "layers.12.self_attn"
483
+ value: 0.33
484
+ - filter: "layers.13.self_attn"
485
+ value: 0.33
486
+ - filter: "layers.14.self_attn"
487
+ value: 0.33
488
+ - filter: "layers.15.self_attn"
489
+ value: 0.33
490
+ - filter: "layers.16.self_attn"
491
+ value: 0.33
492
+ - filter: "layers.17.self_attn"
493
+ value: 0.33
494
+ - filter: "layers.18.self_attn"
495
+ value: 0.33
496
+ - filter: "layers.19.self_attn"
497
+ value: 0.33
498
+ - filter: "layers.20.self_attn"
499
+ value: 0.33
500
+ - filter: "layers.21.self_attn"
501
+ value: 0.33
502
+ - filter: "layers.22.self_attn"
503
+ value: 0.33
504
+ - filter: "layers.23.self_attn"
505
+ value: 0.33
506
+ - filter: "layers.24.self_attn"
507
+ value: 0.33
508
+ - filter: "layers.25.self_attn"
509
+ value: 0.33
510
+ - filter: "layers.0.mlp"
511
+ value: 0.31
512
+ - filter: "layers.1.mlp"
513
+ value: 0.31
514
+ - filter: "layers.2.mlp"
515
+ value: 0.31
516
+ - filter: "layers.3.mlp"
517
+ value: 0.31
518
+ - filter: "layers.4.mlp"
519
+ value: 0.31
520
+ - filter: "layers.5.mlp"
521
+ value: 0.31
522
+ - filter: "layers.6.mlp"
523
+ value: 0.31
524
+ - filter: "layers.7.mlp"
525
+ value: 0.31
526
+ - filter: "layers.8.mlp"
527
+ value: 0.31
528
+ - filter: "layers.9.mlp"
529
+ value: 0.31
530
+ - filter: "layers.10.mlp"
531
+ value: 0.31
532
+ - filter: "layers.11.mlp"
533
+ value: 0.31
534
+ - filter: "layers.12.mlp"
535
+ value: 0.31
536
+ - filter: "layers.13.mlp"
537
+ value: 0.31
538
+ - filter: "layers.14.mlp"
539
+ value: 0.31
540
+ - filter: "layers.15.mlp"
541
+ value: 0.31
542
+ - filter: "layers.16.mlp"
543
+ value: 0.31
544
+ - filter: "layers.17.mlp"
545
+ value: 0.31
546
+ - filter: "layers.18.mlp"
547
+ value: 0.31
548
+ - filter: "layers.19.mlp"
549
+ value: 0.31
550
+ - filter: "layers.20.mlp"
551
+ value: 0.31
552
+ - filter: "layers.21.mlp"
553
+ value: 0.31
554
+ - filter: "layers.22.mlp"
555
+ value: 0.31
556
+ - filter: "layers.23.mlp"
557
+ value: 0.31
558
+ - filter: "layers.24.mlp"
559
+ value: 0.31
560
+ - filter: "layers.25.mlp"
561
+ value: 0.31
562
+
563
+ # ── MeroMero inflection zone 26-35 ──────────────────────
564
+ # Artemis minimized to protect MeroMero's detail zone.
565
+ # Scaled proportionally to budget.
566
+ - filter: "layers.26.self_attn"
567
+ value: 0.23
568
+ - filter: "layers.27.self_attn"
569
+ value: 0.23
570
+ - filter: "layers.28.self_attn"
571
+ value: 0.23
572
+ - filter: "layers.29.self_attn"
573
+ value: 0.23
574
+ - filter: "layers.30.self_attn"
575
+ value: 0.23
576
+ - filter: "layers.31.self_attn"
577
+ value: 0.23
578
+ - filter: "layers.32.self_attn"
579
+ value: 0.23
580
+ - filter: "layers.33.self_attn"
581
+ value: 0.23
582
+ - filter: "layers.34.self_attn"
583
+ value: 0.23
584
+ - filter: "layers.35.self_attn"
585
+ value: 0.23
586
+ - filter: "layers.26.mlp"
587
+ value: 0.18
588
+ - filter: "layers.27.mlp"
589
+ value: 0.18
590
+ - filter: "layers.28.mlp"
591
+ value: 0.18
592
+ - filter: "layers.29.mlp"
593
+ value: 0.18
594
+ - filter: "layers.30.mlp"
595
+ value: 0.18
596
+ - filter: "layers.31.mlp"
597
+ value: 0.18
598
+ - filter: "layers.32.mlp"
599
+ value: 0.18
600
+ - filter: "layers.33.mlp"
601
+ value: 0.18
602
+ - filter: "layers.34.mlp"
603
+ value: 0.18
604
+ - filter: "layers.35.mlp"
605
+ value: 0.18
606
+
607
+ # ── Sustained active zone 36-47 ─────────────────────────
608
+ # Artemis MLP leads for creative voice.
609
+ # Scaled to budget — still dominant over MeroMero MLP here.
610
+ - filter: "layers.36.self_attn"
611
+ value: 0.31
612
+ - filter: "layers.37.self_attn"
613
+ value: 0.31
614
+ - filter: "layers.38.self_attn"
615
+ value: 0.31
616
+ - filter: "layers.39.self_attn"
617
+ value: 0.31
618
+ - filter: "layers.40.self_attn"
619
+ value: 0.31
620
+ - filter: "layers.41.self_attn"
621
+ value: 0.31
622
+ - filter: "layers.42.self_attn"
623
+ value: 0.31
624
+ - filter: "layers.43.self_attn"
625
+ value: 0.31
626
+ - filter: "layers.44.self_attn"
627
+ value: 0.31
628
+ - filter: "layers.45.self_attn"
629
+ value: 0.31
630
+ - filter: "layers.46.self_attn"
631
+ value: 0.31
632
+ - filter: "layers.47.self_attn"
633
+ value: 0.31
634
+ - filter: "layers.36.mlp"
635
+ value: 0.51
636
+ - filter: "layers.37.mlp"
637
+ value: 0.51
638
+ - filter: "layers.38.mlp"
639
+ value: 0.51
640
+ - filter: "layers.39.mlp"
641
+ value: 0.51
642
+ - filter: "layers.40.mlp"
643
+ value: 0.51
644
+ - filter: "layers.41.mlp"
645
+ value: 0.51
646
+ - filter: "layers.42.mlp"
647
+ value: 0.51
648
+ - filter: "layers.43.mlp"
649
+ value: 0.51
650
+ - filter: "layers.44.mlp"
651
+ value: 0.49
652
+ - filter: "layers.45.mlp"
653
+ value: 0.49
654
+ - filter: "layers.46.mlp"
655
+ value: 0.49
656
+ - filter: "layers.47.mlp"
657
+ value: 0.49
658
+
659
+ # ── Active zone 48-50 ───────────────────────────────────
660
+ # Ortenzya dead here — back to two-model budget of 1.05.
661
+ - filter: "layers.48.self_attn"
662
+ value: 0.35
663
+ - filter: "layers.49.self_attn"
664
+ value: 0.35
665
+ - filter: "layers.50.self_attn"
666
+ value: 0.35
667
+ - filter: "layers.48.mlp"
668
+ value: 0.65
669
+ - filter: "layers.49.mlp"
670
+ value: 0.65
671
+ - filter: "layers.50.mlp"
672
+ value: 0.65
673
+
674
+ # ── Output zone 51-59 ───────────────────────────────────
675
+ # Ortenzya dead — two-model budget.
676
+ # Artemis MLP nudged down from 0.60 to 0.55
677
+ # to bring output sum to 1.05.
678
+ - filter: "layers.51.self_attn"
679
+ value: 0.40
680
+ - filter: "layers.52.self_attn"
681
+ value: 0.40
682
+ - filter: "layers.53.self_attn"
683
+ value: 0.40
684
+ - filter: "layers.54.self_attn"
685
+ value: 0.40
686
+ - filter: "layers.55.self_attn"
687
+ value: 0.40
688
+ - filter: "layers.56.self_attn"
689
+ value: 0.40
690
+ - filter: "layers.57.self_attn"
691
+ value: 0.40
692
+ - filter: "layers.58.self_attn"
693
+ value: 0.40
694
+ - filter: "layers.59.self_attn"
695
+ value: 0.40
696
+ - filter: "layers.51.mlp"
697
+ value: 0.55
698
+ - filter: "layers.52.mlp"
699
+ value: 0.55
700
+ - filter: "layers.53.mlp"
701
+ value: 0.55
702
+ - filter: "layers.54.mlp"
703
+ value: 0.55
704
+ - filter: "layers.55.mlp"
705
+ value: 0.55
706
+ - filter: "layers.56.mlp"
707
+ value: 0.55
708
+ - filter: "layers.57.mlp"
709
+ value: 0.55
710
+ - filter: "layers.58.mlp"
711
+ value: 0.55
712
+ - filter: "layers.59.mlp"
713
+ value: 0.55
714
+
715
+ # ── Zone-wide fallbacks ─────────────────────────────────
716
+ - filter: "self_attn"
717
+ value: 0.33
718
+ - filter: "mlp"
719
+ value: 0.31
720
+ - value: 0.50
721
+ density: 0.85
722
+ epsilon: 0.099
723
+
724
+ merge_method: della
725
+ base_model: google/gemma-4-31B-it
726
+
727
+ parameters:
728
+ lambda: 1.0
729
+ normalize: false
730
+
731
+ tokenizer:
732
+ source: base
733
+ chat_template: auto
734
+ dtype: float32
735
+ out_dtype: bfloat16
model-00001-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e4672923855f191f80ae0e39895175e9b0d70976f67f15fc6eaf4a50b15d103
3
+ size 4965634630
model-00002-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c57b84bd9c266686525b4a8fbbdf18160d2bc042cf454e65a336734f469cf848
3
+ size 4899701578
model-00003-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e183bbe369b38e10931d1c0712f69a243522f234825fe4c1a43c30f6a13299
3
+ size 4899701570
model-00004-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7b99c0098e6e10c6bc618c2bfc16bdd88097261e442168ddf130c227d99261
3
+ size 4899701562
model-00005-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2afff00d6f99acfcfe79acc5f13753d49f7509f4098fb6a630f1797491a7464
3
+ size 4789600202
model-00006-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8be606649b3302f96bc66a4d278ad9292b2d57b4ccbafdb09aef15ca67df560f
3
+ size 4899701562
model-00007-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c7710bb9c7ad26dcf0680882e1a9804b3c0761ea2f6688dee200ae43961b70
3
+ size 4899701578
model-00008-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d6e09041f92097e9094bdba35fb2721b3c5a41d00441d8b0a75a2300e426281
3
+ size 4899701562
model-00009-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e29fb3b282c5ef2d0237afe9dd6f979765f0fd26d636c45ea5d429ed9544164
3
+ size 4789600202
model-00010-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b0e951023a2ccb64c97018273fd5c9028ee9f221bfe6abd71175702348c9ea9
3
+ size 4778591802
model-00011-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87436e9da22d011e9636626d377756e281ba6a0f1bce72b55888970561993622
3
+ size 4899701578
model-00012-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7908dd180e8bb2c146ff23407131b54142d7f5d2a356691af48059c19fa50d
3
+ size 4899701562
model-00013-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3c2bfe63ab129d4d053e34f5b08082266673f964993a1ff04fd7154e089007
3
+ size 4012910652
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_ms_per_token": 40,
3
+ "audio_seq_length": 750,
4
+ "feature_extractor": {
5
+ "dither": 0.0,
6
+ "feature_extractor_type": "Gemma4AudioFeatureExtractor",
7
+ "feature_size": 128,
8
+ "fft_length": 512,
9
+ "fft_overdrive": false,
10
+ "frame_length": 320,
11
+ "hop_length": 160,
12
+ "input_scale_factor": 1.0,
13
+ "max_frequency": 8000.0,
14
+ "mel_floor": 0.001,
15
+ "min_frequency": 0.0,
16
+ "padding_side": "right",
17
+ "padding_value": 0.0,
18
+ "per_bin_mean": null,
19
+ "per_bin_stddev": null,
20
+ "preemphasis": 0.0,
21
+ "preemphasis_htk_flavor": true,
22
+ "return_attention_mask": true,
23
+ "sampling_rate": 16000
24
+ },
25
+ "image_processor": {
26
+ "do_convert_rgb": true,
27
+ "do_normalize": false,
28
+ "do_rescale": true,
29
+ "do_resize": true,
30
+ "image_mean": [
31
+ 0.0,
32
+ 0.0,
33
+ 0.0
34
+ ],
35
+ "image_processor_type": "Gemma4ImageProcessor",
36
+ "image_seq_length": 280,
37
+ "image_std": [
38
+ 1.0,
39
+ 1.0,
40
+ 1.0
41
+ ],
42
+ "max_soft_tokens": 280,
43
+ "patch_size": 16,
44
+ "pooling_kernel_size": 3,
45
+ "resample": 3,
46
+ "rescale_factor": 0.00392156862745098
47
+ },
48
+ "image_seq_length": 280,
49
+ "processor_class": "Gemma4Processor",
50
+ "video_processor": {
51
+ "do_convert_rgb": true,
52
+ "do_normalize": true,
53
+ "do_rescale": true,
54
+ "do_resize": true,
55
+ "do_sample_frames": true,
56
+ "image_mean": [
57
+ 0.0,
58
+ 0.0,
59
+ 0.0
60
+ ],
61
+ "image_std": [
62
+ 1.0,
63
+ 1.0,
64
+ 1.0
65
+ ],
66
+ "max_soft_tokens": 70,
67
+ "num_frames": 32,
68
+ "patch_size": 16,
69
+ "pooling_kernel_size": 3,
70
+ "resample": 3,
71
+ "rescale_factor": 0.00392156862745098,
72
+ "return_metadata": false,
73
+ "video_processor_type": "Gemma4VideoProcessor"
74
+ }
75
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc8d3a0ce36466ccc1278bf987df5f71db1719b9ca6b4118264f45cb627bfe0f
3
+ size 32169626
tokenizer_config.json ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "audio_token": "<|audio|>",
3
+ "backend": "tokenizers",
4
+ "boa_token": "<|audio>",
5
+ "boi_token": "<|image>",
6
+ "bos_token": "<bos>",
7
+ "eoa_token": "<audio|>",
8
+ "eoc_token": "<channel|>",
9
+ "eoi_token": "<image|>",
10
+ "eos_token": "<eos>",
11
+ "eot_token": "<turn|>",
12
+ "escape_token": "<|\"|>",
13
+ "etc_token": "<tool_call|>",
14
+ "etd_token": "<tool|>",
15
+ "etr_token": "<tool_response|>",
16
+ "extra_special_tokens": [
17
+ "<|video|>"
18
+ ],
19
+ "image_token": "<|image|>",
20
+ "is_local": true,
21
+ "local_files_only": false,
22
+ "mask_token": "<mask>",
23
+ "model_max_length": 1000000000000000019884624838656,
24
+ "model_specific_special_tokens": {
25
+ "audio_token": "<|audio|>",
26
+ "boa_token": "<|audio>",
27
+ "boi_token": "<|image>",
28
+ "eoa_token": "<audio|>",
29
+ "eoc_token": "<channel|>",
30
+ "eoi_token": "<image|>",
31
+ "eot_token": "<turn|>",
32
+ "escape_token": "<|\"|>",
33
+ "etc_token": "<tool_call|>",
34
+ "etd_token": "<tool|>",
35
+ "etr_token": "<tool_response|>",
36
+ "image_token": "<|image|>",
37
+ "soc_token": "<|channel>",
38
+ "sot_token": "<|turn>",
39
+ "stc_token": "<|tool_call>",
40
+ "std_token": "<|tool>",
41
+ "str_token": "<|tool_response>",
42
+ "think_token": "<|think|>"
43
+ },
44
+ "pad_token": "<pad>",
45
+ "padding_side": "left",
46
+ "processor_class": "Gemma4Processor",
47
+ "response_schema": {
48
+ "properties": {
49
+ "content": {
50
+ "type": "string"
51
+ },
52
+ "role": {
53
+ "const": "assistant"
54
+ },
55
+ "thinking": {
56
+ "type": "string"
57
+ },
58
+ "tool_calls": {
59
+ "items": {
60
+ "properties": {
61
+ "function": {
62
+ "properties": {
63
+ "arguments": {
64
+ "additionalProperties": {},
65
+ "type": "object",
66
+ "x-parser": "gemma4-tool-call"
67
+ },
68
+ "name": {
69
+ "type": "string"
70
+ }
71
+ },
72
+ "type": "object",
73
+ "x-regex": "call\\:(?P<name>\\w+)(?P<arguments>\\{.*\\})"
74
+ },
75
+ "type": {
76
+ "const": "function"
77
+ }
78
+ },
79
+ "type": "object"
80
+ },
81
+ "type": "array",
82
+ "x-regex-iterator": "<\\|tool_call>(.*?)<tool_call\\|>"
83
+ }
84
+ },
85
+ "type": "object",
86
+ "x-regex": "(\\<\\|channel\\>thought\\n(?P<thinking>.*?)\\<channel\\|\\>)?(?P<tool_calls>\\<\\|tool_call\\>.*\\<tool_call\\|\\>)?(?P<content>(?:(?!\\<turn\\|\\>)(?!\\<\\|tool_response\\>).)+)?(?:\\<turn\\|\\>|\\<\\|tool_response\\>)?"
87
+ },
88
+ "soc_token": "<|channel>",
89
+ "sot_token": "<|turn>",
90
+ "stc_token": "<|tool_call>",
91
+ "std_token": "<|tool>",
92
+ "str_token": "<|tool_response>",
93
+ "think_token": "<|think|>",
94
+ "tokenizer_class": "GemmaTokenizer",
95
+ "unk_token": "<unk>"
96
+ }