fix: chat template — null handling, reasoning preservation, turn-tag balance, input validation

#35
Files changed (1) hide show
  1. chat_template.jinja +61 -40
chat_template.jinja CHANGED
@@ -116,7 +116,9 @@
116
  }
117
  {%- endmacro -%}
118
  {%- macro format_argument(argument, escape_keys=True) -%}
119
- {%- if argument is string -%}
 
 
120
  {{- '<|"|>' + argument + '<|"|>' -}}
121
  {%- elif argument is boolean -%}
122
  {{- 'true' if argument else 'false' -}}
@@ -172,18 +174,21 @@
172
  {{- '<tool_response|>' -}}
173
  {%- endmacro -%}
174
 
175
- {%- set ns = namespace(prev_message_type=None) -%}
 
176
  {%- set loop_messages = messages -%}
 
 
177
  {{- bos_token -}}
178
  {#- Handle System/Tool Definitions Block -#}
179
- {%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}
180
  {{- '<|turn>system\n' -}}
181
  {#- Inject Thinking token at the very top of the FIRST system turn -#}
182
- {%- if enable_thinking is defined and enable_thinking -%}
183
  {{- '<|think|>\n' -}}
184
  {%- set ns.prev_message_type = 'think' -%}
185
  {%- endif -%}
186
- {%- if messages[0]['role'] in ['system', 'developer'] -%}
187
  {%- if messages[0]['content'] is string -%}
188
  {{- messages[0]['content'] | trim -}}
189
  {%- elif messages[0]['content'] is sequence -%}
@@ -217,31 +222,22 @@
217
  {%- if message['role'] != 'tool' -%}
218
  {%- set ns.prev_message_type = None -%}
219
  {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
220
- {#- Detect continuation: suppress duplicate <|turn>model when previous non-tool message was also assistant -#}
221
- {%- set prev_nt = namespace(role=None, found=false) -%}
222
- {%- if loop.index0 > 0 -%}
223
- {%- for j in range(loop.index0 - 1, -1, -1) -%}
224
- {%- if not prev_nt.found -%}
225
- {%- if loop_messages[j]['role'] != 'tool' -%}
226
- {%- set prev_nt.role = loop_messages[j]['role'] -%}
227
- {%- set prev_nt.found = true -%}
228
- {%- endif -%}
229
- {%- endif -%}
230
- {%- endfor -%}
231
- {%- endif -%}
232
- {%- set continue_same_model_turn = (role == 'model' and prev_nt.role == 'assistant') -%}
233
  {%- if not continue_same_model_turn -%}
234
  {{- '<|turn>' + role + '\n' }}
 
235
  {%- endif -%}
236
 
237
  {#- Render reasoning/reasoning_content as thinking channel -#}
238
  {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
239
- {%- if thinking_text and loop.index0 > ns_turn.last_user_idx and message.get('tool_calls') -%}
 
240
  {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
241
  {%- endif -%}
242
 
243
- {%- if message['tool_calls'] -%}
244
- {%- for tool_call in message['tool_calls'] -%}
245
  {%- set function = tool_call['function'] -%}
246
  {{- '<|tool_call>call:' + function['name'] + '{' -}}
247
  {%- if function['arguments'] is mapping -%}
@@ -251,8 +247,13 @@
251
  {%- set ns_args.found_first = true -%}
252
  {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
253
  {%- endfor -%}
254
- {%- elif function['arguments'] is string -%}
255
- {{- function['arguments'] -}}
 
 
 
 
 
256
  {%- endif -%}
257
  {{- '}<tool_call|>' -}}
258
  {%- endfor -%}
@@ -262,8 +263,8 @@
262
  {%- set ns_tr_out = namespace(flag=false) -%}
263
  {%- if message.get('tool_responses') -%}
264
  {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
265
- {%- for tool_response in message['tool_responses'] -%}
266
- {{- format_tool_response_block(tool_response['name'] | default('unknown'), tool_response['response']) -}}
267
  {%- set ns_tr_out.flag = true -%}
268
  {%- set ns.prev_message_type = 'tool_response' -%}
269
  {%- endfor -%}
@@ -277,8 +278,8 @@
277
  {%- else -%}
278
  {%- set follow = loop_messages[k] -%}
279
  {#- Resolve tool_call_id to function name -#}
280
- {%- set ns_tname = namespace(name=follow.get('name') | default('unknown')) -%}
281
- {%- for tc in message['tool_calls'] -%}
282
  {%- if tc.get('id') == follow.get('tool_call_id') -%}
283
  {%- set ns_tname.name = tc['function']['name'] -%}
284
  {%- endif -%}
@@ -296,9 +297,9 @@
296
  {%- endfor -%}
297
  {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
298
  {%- for part in tool_body -%}
299
- {%- if part.get('type') == 'image' -%}
300
  {{- '<|image|>' -}}
301
- {%- elif part.get('type') == 'audio' -%}
302
  {{- '<|audio|>' -}}
303
  {%- elif part.get('type') == 'video' -%}
304
  {{- '<|video|>' -}}
@@ -314,29 +315,26 @@
314
  {%- endif -%}
315
 
316
  {%- set captured_content -%}
317
- {%- if message['content'] is string -%}
318
  {%- if role == 'model' -%}
319
  {{- strip_thinking(message['content']) -}}
320
  {%- else -%}
321
  {{- message['content'] | trim -}}
322
  {%- endif -%}
323
- {%- elif message['content'] is sequence -%}
324
  {%- for item in message['content'] -%}
325
- {%- if item['type'] == 'text' -%}
326
  {%- if role == 'model' -%}
327
  {{- strip_thinking(item['text']) -}}
328
  {%- else -%}
329
  {{- item['text'] | trim -}}
330
  {%- endif -%}
331
- {%- elif item['type'] == 'image' -%}
332
  {{- '<|image|>' -}}
333
- {%- set ns.prev_message_type = 'image' -%}
334
- {%- elif item['type'] == 'audio' -%}
335
  {{- '<|audio|>' -}}
336
- {%- set ns.prev_message_type = 'audio' -%}
337
- {%- elif item['type'] == 'video' -%}
338
  {{- '<|video|>' -}}
339
- {%- set ns.prev_message_type = 'video' -%}
340
  {%- endif -%}
341
  {%- endfor -%}
342
  {%- endif -%}
@@ -345,19 +343,42 @@
345
  {{- captured_content -}}
346
  {%- set has_content = captured_content | trim | length > 0 -%}
347
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
  {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
349
  {{- '<|tool_response>' -}}
 
350
  {%- elif not (ns_tr_out.flag and not has_content) -%}
351
  {{- '<turn|>\n' -}}
352
  {%- endif -%}
 
 
 
353
  {%- endif -%}
354
  {%- endfor -%}
355
 
356
  {%- if add_generation_prompt -%}
357
  {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
358
  {{- '<|turn>model\n' -}}
359
- {%- if not enable_thinking | default(false) -%}
360
  {{- '<|channel>thought\n<channel|>' -}}
361
  {%- endif -%}
 
 
362
  {%- endif -%}
363
- {%- endif -%}
 
116
  }
117
  {%- endmacro -%}
118
  {%- macro format_argument(argument, escape_keys=True) -%}
119
+ {%- if argument is none -%}
120
+ {{- 'null' -}}
121
+ {%- elif argument is string -%}
122
  {{- '<|"|>' + argument + '<|"|>' -}}
123
  {%- elif argument is boolean -%}
124
  {{- 'true' if argument else 'false' -}}
 
174
  {{- '<tool_response|>' -}}
175
  {%- endmacro -%}
176
 
177
+ {#- ===== SETUP ===== -#}
178
+ {%- set ns = namespace(prev_message_type=None, prev_non_tool_role=None) -%}
179
  {%- set loop_messages = messages -%}
180
+ {%- set enable_thinking = enable_thinking | default(false) -%}
181
+ {%- set preserve_thinking = preserve_thinking | default(false) -%}
182
  {{- bos_token -}}
183
  {#- Handle System/Tool Definitions Block -#}
184
+ {%- if enable_thinking or tools or (messages and messages[0]['role'] in ['system', 'developer']) -%}
185
  {{- '<|turn>system\n' -}}
186
  {#- Inject Thinking token at the very top of the FIRST system turn -#}
187
+ {%- if enable_thinking -%}
188
  {{- '<|think|>\n' -}}
189
  {%- set ns.prev_message_type = 'think' -%}
190
  {%- endif -%}
191
+ {%- if messages and messages[0]['role'] in ['system', 'developer'] -%}
192
  {%- if messages[0]['content'] is string -%}
193
  {{- messages[0]['content'] | trim -}}
194
  {%- elif messages[0]['content'] is sequence -%}
 
222
  {%- if message['role'] != 'tool' -%}
223
  {%- set ns.prev_message_type = None -%}
224
  {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}
225
+ {#- Detect continuation using tracked state O(1) instead of O(n) backward scan -#}
226
+ {%- set continue_same_model_turn = (role == 'model' and ns.prev_non_tool_role == 'assistant') -%}
 
 
 
 
 
 
 
 
 
 
 
227
  {%- if not continue_same_model_turn -%}
228
  {{- '<|turn>' + role + '\n' }}
229
+
230
  {%- endif -%}
231
 
232
  {#- Render reasoning/reasoning_content as thinking channel -#}
233
  {%- set thinking_text = message.get('reasoning') or message.get('reasoning_content') -%}
234
+ {%- set thinking_gate = (loop.index0 > ns_turn.last_user_idx) or (preserve_thinking and message.get('tool_calls')) -%}
235
+ {%- if thinking_text and thinking_gate -%}
236
  {{- '<|channel>thought\n' + thinking_text + '\n<channel|>' -}}
237
  {%- endif -%}
238
 
239
+ {%- if message.get('tool_calls') -%}
240
+ {%- for tool_call in message.get('tool_calls') -%}
241
  {%- set function = tool_call['function'] -%}
242
  {{- '<|tool_call>call:' + function['name'] + '{' -}}
243
  {%- if function['arguments'] is mapping -%}
 
247
  {%- set ns_args.found_first = true -%}
248
  {{- key -}}:{{- format_argument(value, escape_keys=False) -}}
249
  {%- endfor -%}
250
+ {%- elif function['arguments'] is none -%}
251
+ {%- else -%}
252
+ {{- raise_exception(
253
+ "chat_template: tool_calls[].function.arguments must be a "
254
+ "JSON object (mapping), not a string. Deserialize arguments "
255
+ "before passing to the template."
256
+ ) -}}
257
  {%- endif -%}
258
  {{- '}<tool_call|>' -}}
259
  {%- endfor -%}
 
263
  {%- set ns_tr_out = namespace(flag=false) -%}
264
  {%- if message.get('tool_responses') -%}
265
  {#- Legacy: tool_responses embedded on the assistant message (Google/Gemma native) -#}
266
+ {%- for tool_response in message.get('tool_responses') -%}
267
+ {{- format_tool_response_block(tool_response['name'] | default('unknown', true), tool_response['response']) -}}
268
  {%- set ns_tr_out.flag = true -%}
269
  {%- set ns.prev_message_type = 'tool_response' -%}
270
  {%- endfor -%}
 
278
  {%- else -%}
279
  {%- set follow = loop_messages[k] -%}
280
  {#- Resolve tool_call_id to function name -#}
281
+ {%- set ns_tname = namespace(name=follow.get('name') or 'unknown') -%}
282
+ {%- for tc in message.get('tool_calls') -%}
283
  {%- if tc.get('id') == follow.get('tool_call_id') -%}
284
  {%- set ns_tname.name = tc['function']['name'] -%}
285
  {%- endif -%}
 
297
  {%- endfor -%}
298
  {{- format_tool_response_block(ns_tname.name, ns_txt.s) -}}
299
  {%- for part in tool_body -%}
300
+ {%- if part.get('type') in ['image', 'image_url'] -%}
301
  {{- '<|image|>' -}}
302
+ {%- elif part.get('type') in ['audio', 'input_audio'] -%}
303
  {{- '<|audio|>' -}}
304
  {%- elif part.get('type') == 'video' -%}
305
  {{- '<|video|>' -}}
 
315
  {%- endif -%}
316
 
317
  {%- set captured_content -%}
318
+ {%- if message.get('content') is string -%}
319
  {%- if role == 'model' -%}
320
  {{- strip_thinking(message['content']) -}}
321
  {%- else -%}
322
  {{- message['content'] | trim -}}
323
  {%- endif -%}
324
+ {%- elif message.get('content') is sequence -%}
325
  {%- for item in message['content'] -%}
326
+ {%- if item.get('type') == 'text' -%}
327
  {%- if role == 'model' -%}
328
  {{- strip_thinking(item['text']) -}}
329
  {%- else -%}
330
  {{- item['text'] | trim -}}
331
  {%- endif -%}
332
+ {%- elif item.get('type') in ['image', 'image_url'] -%}
333
  {{- '<|image|>' -}}
334
+ {%- elif item.get('type') in ['audio', 'input_audio'] -%}
 
335
  {{- '<|audio|>' -}}
336
+ {%- elif item.get('type') == 'video' -%}
 
337
  {{- '<|video|>' -}}
 
338
  {%- endif -%}
339
  {%- endfor -%}
340
  {%- endif -%}
 
343
  {{- captured_content -}}
344
  {%- set has_content = captured_content | trim | length > 0 -%}
345
 
346
+ {#- Forward-scan: find next non-tool message role for continuation detection -#}
347
+ {%- set next_nt = namespace(role=None, found=false) -%}
348
+ {%- for j in range(loop.index0 + 1, loop_messages | length) -%}
349
+ {%- if not next_nt.found -%}
350
+ {%- if loop_messages[j]['role'] != 'tool' -%}
351
+ {%- set next_nt.role = loop_messages[j]['role'] -%}
352
+ {%- set next_nt.found = true -%}
353
+ {%- endif -%}
354
+ {%- endif -%}
355
+ {%- endfor -%}
356
+
357
+ {%- set continues_into_next = (
358
+ role == 'model'
359
+ and next_nt.role == 'assistant'
360
+ and (not message.get('tool_calls') or ns_tr_out.flag)
361
+ ) -%}
362
+
363
  {%- if ns.prev_message_type == 'tool_call' and not ns_tr_out.flag -%}
364
  {{- '<|tool_response>' -}}
365
+ {%- elif continues_into_next -%}
366
  {%- elif not (ns_tr_out.flag and not has_content) -%}
367
  {{- '<turn|>\n' -}}
368
  {%- endif -%}
369
+
370
+ {#- Track previous non-tool role for next iteration (avoids O(n) backward scan) -#}
371
+ {%- set ns.prev_non_tool_role = message['role'] -%}
372
  {%- endif -%}
373
  {%- endfor -%}
374
 
375
  {%- if add_generation_prompt -%}
376
  {%- if ns.prev_message_type != 'tool_response' and ns.prev_message_type != 'tool_call' -%}
377
  {{- '<|turn>model\n' -}}
378
+ {%- if not enable_thinking -%}
379
  {{- '<|channel>thought\n<channel|>' -}}
380
  {%- endif -%}
381
+ {%- elif ns.prev_message_type == 'tool_response' and enable_thinking -%}
382
+ {{- '<|channel>thought\n' -}}
383
  {%- endif -%}
384
+ {%- endif -%}