diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..0cb9a4e534706488d6812d01208b830de4790dcd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,35 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +onnx/audio_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/audio_encoder_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/audio_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/audio_encoder_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/audio_encoder_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged.onnx_data_1 filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged.onnx_data_2 filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged.onnx_data_3 filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged.onnx_data_4 filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged_fp16.onnx_data_1 filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged_fp16.onnx_data_2 filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged_q4.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/decoder_model_merged_quantized.onnx_data_1 filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens.onnx_data_1 filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens_fp16.onnx_data_1 filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens_q4.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens_quantized.onnx_data_1 filter=lfs diff=lfs merge=lfs -text +onnx/embed_tokens_quantized.onnx_data_2 filter=lfs diff=lfs merge=lfs -text +onnx/vision_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/vision_encoder_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/vision_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/vision_encoder_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text +onnx/vision_encoder_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..afb1d517bedb410d5bb32df4300d17f6e5888e2a --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,263 @@ +{%- macro format_parameters(properties, required) -%} + {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%} + {%- set ns = namespace(found_first=false) -%} + {%- for key, value in properties | dictsort -%} + {%- set add_comma = false -%} + {%- if key not in standard_keys -%} + {%- if ns.found_first %},{% endif -%} + {%- set ns.found_first = true -%} + {{ key }}:{ + {%- if value['description'] -%} + description:<|"|>{{ value['description'] }}<|"|> + {%- set add_comma = true -%} + {%- endif -%} + {%- if value['nullable'] %} + {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%} + nullable:true + {%- endif -%} + {%- if value['type'] | upper == 'STRING' -%} + {%- if value['enum'] -%} + {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%} + enum:{{ format_argument(value['enum']) }} + {%- endif -%} + {%- elif value['type'] | upper == 'OBJECT' -%} + ,properties:{ + {%- if value['properties'] is defined and value['properties'] is mapping -%} + {{- format_parameters(value['properties'], value['required'] | default([])) -}} + {%- elif value is mapping -%} + {{- format_parameters(value, value['required'] | default([])) -}} + {%- endif -%} + } + {%- if value['required'] -%} + ,required:[ + {%- for item in value['required'] | default([]) -%} + <|"|>{{- item -}}<|"|> + {%- if not loop.last %},{% endif -%} + {%- endfor -%} + ] + {%- endif -%} + {%- elif value['type'] | upper == 'ARRAY' -%} + {%- if value['items'] is mapping and value['items'] -%} + ,items:{ + {%- set ns_items = namespace(found_first=false) -%} + {%- for item_key, item_value in value['items'] | dictsort -%} + {%- if item_value is not none -%} + {%- if ns_items.found_first %},{% endif -%} + {%- set ns_items.found_first = true -%} + {%- if item_key == 'properties' -%} + properties:{ + {%- if item_value is mapping -%} + {{- format_parameters(item_value, value['items']['required'] | default([])) -}} + {%- endif -%} + } + {%- elif item_key == 'required' -%} + required:[ + {%- for req_item in item_value -%} + <|"|>{{- req_item -}}<|"|> + {%- if not loop.last %},{% endif -%} + {%- endfor -%} + ] + {%- elif item_key == 'type' -%} + {%- if item_value is string -%} + type:{{ format_argument(item_value | upper) }} + {%- else -%} + type:{{ format_argument(item_value | map('upper') | list) }} + {%- endif -%} + {%- else -%} + {{ item_key }}:{{ format_argument(item_value) }} + {%- endif -%} + {%- endif -%} + {%- endfor -%} + } + {%- endif -%} + {%- endif -%} + {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%} + type:<|"|>{{ value['type'] | upper }}<|"|>} + {%- endif -%} + {%- endfor -%} +{%- endmacro -%} +{%- macro format_function_declaration(tool_data) -%} + declaration:{{- tool_data['function']['name'] -}}{description:<|"|>{{- tool_data['function']['description'] -}}<|"|> + {%- set params = tool_data['function']['parameters'] -%} + {%- if params -%} + ,parameters:{ + {%- if params['properties'] -%} + properties:{ {{- format_parameters(params['properties'], params['required']) -}} }, + {%- endif -%} + {%- if params['required'] -%} + required:[ + {%- for item in params['required'] -%} + <|"|>{{- item -}}<|"|> + {{- ',' if not loop.last -}} + {%- endfor -%} + ], + {%- endif -%} + {%- if params['type'] -%} + type:<|"|>{{- params['type'] | upper -}}<|"|>} + {%- endif -%} + {%- endif -%} + {%- if 'response' in tool_data['function'] -%} + {%- set response_declaration = tool_data['function']['response'] -%} + ,response:{ + {%- if response_declaration['description'] -%} + description:<|"|>{{- response_declaration['description'] -}}<|"|>, + {%- endif -%} + {%- if response_declaration['type'] | upper == 'OBJECT' -%} + type:<|"|>{{- response_declaration['type'] | upper -}}<|"|>} + {%- endif -%} + {%- endif -%} + } +{%- endmacro -%} +{%- macro format_argument(argument, escape_keys=True) -%} + {%- if argument is string -%} + {{- '<|"|>' + argument + '<|"|>' -}} + {%- elif argument is boolean -%} + {{- 'true' if argument else 'false' -}} + {%- elif argument is mapping -%} + {{- '{' -}} + {%- set ns = namespace(found_first=false) -%} + {%- for key, value in argument | dictsort -%} + {%- if ns.found_first %},{% endif -%} + {%- set ns.found_first = true -%} + {%- if escape_keys -%} + {{- '<|"|>' + key + '<|"|>' -}} + {%- else -%} + {{- key -}} + {%- endif -%} + :{{- format_argument(value, escape_keys=escape_keys) -}} + {%- endfor -%} + {{- '}' -}} + {%- elif argument is sequence -%} + {{- '[' -}} + {%- for item in argument -%} + {{- format_argument(item, escape_keys=escape_keys) -}} + {%- if not loop.last %},{% endif -%} + {%- endfor -%} + {{- ']' -}} + {%- else -%} + {{- argument -}} + {%- endif -%} +{%- endmacro -%} +{%- macro strip_thinking(text) -%} + {%- set ns = namespace(result='') -%} + {%- for part in text.split('') -%} + {%- if '<|channel>' in part -%} + {%- set ns.result = ns.result + part.split('<|channel>')[0] -%} + {%- else -%} + {%- set ns.result = ns.result + part -%} + {%- endif -%} + {%- endfor -%} + {{- ns.result | trim -}} +{%- endmacro -%} + +{%- set ns = namespace(prev_message_type=None) -%} +{%- set loop_messages = messages -%} +{{ bos_token }} +{#- Handle System/Tool Definitions Block -#} +{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%} + {{- '<|turn>system\n' -}} + + {#- Inject Thinking token at the very top of the FIRST system turn -#} + {%- if enable_thinking is defined and enable_thinking -%} + {{- '<|think|>' -}} + {%- set ns.prev_message_type = 'think' -%} + {%- endif -%} + + {%- if messages[0]['role'] in ['system', 'developer'] -%} + {{- messages[0]['content'] | trim -}} + {%- set loop_messages = messages[1:] -%} + {%- endif -%} + + {%- if tools -%} + {%- for tool in tools %} + {{- '<|tool>' -}} + {{- format_function_declaration(tool) | trim -}} + {{- '' -}} + {%- endfor %} + {%- set ns.prev_message_type = 'tool' -%} + {%- endif -%} + + {{- '\n' -}} +{%- endif %} + +{#- Loop through messages -#} +{%- for message in loop_messages -%} + {%- set ns.prev_message_type = None -%} + {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%} + {{- '<|turn>' + role + '\n' }} + + {%- if message['tool_calls'] -%} + {%- for tool_call in message['tool_calls'] -%} + {%- set function = tool_call['function'] -%} + {{- '<|tool_call>call:' + function['name'] + '{' -}} + {%- if function['arguments'] is mapping -%} + {%- set ns_args = namespace(found_first=false) -%} + {%- for key, value in function['arguments'] | dictsort -%} + {%- if ns_args.found_first %},{% endif -%} + {%- set ns_args.found_first = true -%} + {{- key -}}:{{- format_argument(value, escape_keys=False) -}} + {%- endfor -%} + {%- elif function['arguments'] is string -%} + {{- function['arguments'] -}} + {%- endif -%} + {{- '}' -}} + {%- endfor -%} + {%- set ns.prev_message_type = 'tool_call' -%} + {%- endif -%} + + {%- if message['tool_responses'] -%} + {#- Tool Response handling -#} + {%- for tool_response in message['tool_responses'] -%} + {{- '<|tool_response>' -}} + {%- if tool_response['response'] is mapping -%} + {{- 'response:' + tool_response['name'] | default('unknown') + '{' -}} + {%- for key, value in tool_response['response'] | dictsort -%} + {{- key -}}:{{- format_argument(value, escape_keys=False) -}} + {%- if not loop.last %},{% endif -%} + {%- endfor -%} + {{- '}' -}} + {%- else -%} + {{- 'response:' + tool_response['name'] | default('unknown') + '{value:' + format_argument(tool_response['response'], escape_keys=False) + '}' -}} + {%- endif -%} + {{- '' -}} + {%- endfor -%} + {%- set ns.prev_message_type = 'tool_response' -%} + {%- endif -%} + + {%- if message['content'] is string -%} + {%- if role == 'model' -%} + {{- strip_thinking(message['content']) -}} + {%- else -%} + {{- message['content'] | trim -}} + {%- endif -%} + {%- elif message['content'] is sequence -%} + {%- for item in message['content'] -%} + {%- if item['type'] == 'text' -%} + {%- if role == 'model' -%} + {{- strip_thinking(item['text']) -}} + {%- else -%} + {{- item['text'] | trim -}} + {%- endif -%} + {%- elif item['type'] == 'image' -%} + {{- '\n\n<|image|>\n\n' -}} + {%- set ns.prev_message_type = 'image' -%} + {%- elif item['type'] == 'audio' -%} + {{- '<|audio|>' -}} + {%- set ns.prev_message_type = 'audio' -%} + {%- elif item['type'] == 'video' -%} + {{- '\n\n<|video|>\n\n' -}} + {%- set ns.prev_message_type = 'video' -%} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + + {%- if not (message['tool_responses'] and not message['content']) -%} + {{- '\n' -}} + {%- endif -%} +{%- endfor -%} + +{%- if add_generation_prompt -%} + {%- if ns.prev_message_type != 'tool_response' -%} + {{- '<|turn>model\n' -}} + {%- endif -%} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..a7f7623b5229c8498655847bd9cdeea34e5017f6 --- /dev/null +++ b/config.json @@ -0,0 +1,210 @@ +{ + "architectures": [ + "Gemma4ForConditionalGeneration" + ], + "audio_config": { + "_name_or_path": "", + "architectures": null, + "attention_chunk_size": 12, + "attention_context_left": 13, + "attention_context_right": 0, + "attention_invalid_logits_value": -1000000000.0, + "attention_logit_cap": 50.0, + "chunk_size_feed_forward": 0, + "conv_kernel_size": 5, + "dtype": "bfloat16", + "gradient_clipping": 10000000000.0, + "hidden_act": "silu", + "hidden_size": 1024, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_range": 0.02, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "model_type": "gemma4_audio", + "num_attention_heads": 8, + "num_hidden_layers": 12, + "output_attentions": false, + "output_hidden_states": false, + "output_proj_dims": 1536, + "problem_type": null, + "residual_weight": 0.5, + "return_dict": true, + "rms_norm_eps": 1e-06, + "subsampling_conv_channels": [ + 128, + 32 + ], + "use_clipped_linears": true + }, + "audio_token_id": 258881, + "boa_token_id": 256000, + "boi_token_id": 255999, + "dtype": "bfloat16", + "eoa_token_id": 258883, + "eoa_token_index": 258883, + "eoi_token_id": 258882, + "eos_token_id": [ + 1, + 106 + ], + "image_token_id": 258880, + "initializer_range": 0.02, + "model_type": "gemma4", + "text_config": { + "attention_bias": false, + "attention_dropout": 0.0, + "attention_k_eq_v": false, + "bos_token_id": 2, + "dtype": "bfloat16", + "enable_moe_block": false, + "eos_token_id": 1, + "expert_intermediate_size": null, + "final_logit_softcapping": 30.0, + "global_head_dim": 512, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1536, + "hidden_size_per_layer_input": 256, + "initializer_range": 0.02, + "intermediate_size": 6144, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 131072, + "model_type": "gemma4_text", + "num_attention_heads": 8, + "num_experts": null, + "num_global_key_value_heads": null, + "num_hidden_layers": 35, + "num_key_value_heads": 1, + "num_kv_shared_layers": 20, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "full_attention": { + "partial_rotary_factor": 0.25, + "rope_theta": 1000000.0, + "rope_type": "proportional" + }, + "sliding_attention": { + "rope_theta": 10000.0, + "rope_type": "default" + } + }, + "sliding_window": 512, + "tie_word_embeddings": true, + "top_k_experts": null, + "use_bidirectional_attention": null, + "use_cache": true, + "use_double_wide_mlp": true, + "vocab_size": 262144, + "vocab_size_per_layer_input": 262144 + }, + "tie_word_embeddings": true, + "transformers_version": "5.5.0.dev0", + "video_token_id": 258884, + "vision_config": { + "_name_or_path": "", + "architectures": null, + "attention_bias": false, + "attention_dropout": 0.0, + "chunk_size_feed_forward": 0, + "default_output_length": 280, + "dtype": "bfloat16", + "global_head_dim": 64, + "head_dim": 64, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 768, + "id2label": { + "0": "LABEL_0", + "1": "LABEL_1" + }, + "initializer_range": 0.02, + "intermediate_size": 3072, + "is_encoder_decoder": false, + "label2id": { + "LABEL_0": 0, + "LABEL_1": 1 + }, + "max_position_embeddings": 131072, + "model_type": "gemma4_vision", + "num_attention_heads": 12, + "num_hidden_layers": 16, + "num_key_value_heads": 12, + "output_attentions": false, + "output_hidden_states": false, + "patch_size": 16, + "pooling_kernel_size": 3, + "position_embedding_size": 10240, + "problem_type": null, + "return_dict": true, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 100.0, + "rope_type": "default" + }, + "standardize": false, + "use_clipped_linears": true + }, + "vision_soft_tokens_per_image": 280, + "transformers.js_config": { + "use_external_data_format": { + "audio_encoder": 1, + "vision_encoder": 1, + "decoder_model_merged.onnx": 5, + "decoder_model_merged_fp16.onnx": 3, + "decoder_model_merged_quantized.onnx": 2, + "decoder_model_merged_q4.onnx": 1, + "decoder_model_merged_q4f16.onnx": 1, + "embed_tokens.onnx": 2, + "embed_tokens_fp16.onnx": 2, + "embed_tokens_quantized.onnx": 3, + "embed_tokens_q4.onnx": 1, + "embed_tokens_q4f16.onnx": 1 + }, + "kv_cache_dtype": { + "q4f16": "float16", + "fp16": "float16" + } + } +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b2b0ab11eaf5317ad648bb48ce64b110532d661a --- /dev/null +++ b/generation_config.json @@ -0,0 +1,15 @@ +{ + "bos_token_id": 2, + "do_sample": true, + "eos_token_id": [ + 1, + 106, + 50 + ], + "pad_token_id": 0, + "temperature": 1.0, + "top_k": 64, + "top_p": 0.95, + "transformers_version": "5.5.0.dev0", + "trust_remote_code": false +} diff --git a/onnx/audio_encoder.onnx b/onnx/audio_encoder.onnx new file mode 100644 index 0000000000000000000000000000000000000000..5a6a93f06f1ec72e102451e33c6b5bb60f8b16ee --- /dev/null +++ b/onnx/audio_encoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01e8bdf202be608abe55ed3f1f180b897a73387b5c76f8801c9023c6dbc525b0 +size 220280 diff --git a/onnx/audio_encoder.onnx_data b/onnx/audio_encoder.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..114cf5ce5997bdbff6cbca049b001c09461be4c5 --- /dev/null +++ b/onnx/audio_encoder.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd9fc0d5a06df02638d681ce15c51b84ba94fa7824e0e25effbeba4f02b9ba3c +size 1179042304 diff --git a/onnx/audio_encoder_fp16.onnx b/onnx/audio_encoder_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e00e3d2dc161b1996425d4c7c63c4a35110ff34a --- /dev/null +++ b/onnx/audio_encoder_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f40fa9ce8da3996822d064762ffcbac24f44721e72f62d870dbd66619099ebf9 +size 219310 diff --git a/onnx/audio_encoder_fp16.onnx_data b/onnx/audio_encoder_fp16.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..260b2ef2eddd1f96c678ce227e6837dd8e1f312d --- /dev/null +++ b/onnx/audio_encoder_fp16.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb81ececc02d80405b8455f3f6206daedfcf1f3ea3f3286d838a62f83747f931 +size 589840640 diff --git a/onnx/audio_encoder_q4.onnx b/onnx/audio_encoder_q4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..412b17e1338fbc3cc2b7091a91ffcfe35767c092 --- /dev/null +++ b/onnx/audio_encoder_q4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a1d1f7821ef93809155448e4e441aadf14a4a159da5e6ce9ca7468f80b44dc +size 261229 diff --git a/onnx/audio_encoder_q4.onnx_data b/onnx/audio_encoder_q4.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..346b165646bc8b44d8bd2e094da66b806e0d57ed --- /dev/null +++ b/onnx/audio_encoder_q4.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae533f4fd4103b2f76b0d60082bf0de8567c2225d65dfe34461662b20d4da320 +size 190083584 diff --git a/onnx/audio_encoder_q4f16.onnx b/onnx/audio_encoder_q4f16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..1a1d19d72bf23bc787c67dee5c26ef4a5d71eb7a --- /dev/null +++ b/onnx/audio_encoder_q4f16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e0deb22791685c792d4b8e089deef9670fa4a4cecde434213d6a742e58fc3fa +size 260446 diff --git a/onnx/audio_encoder_q4f16.onnx_data b/onnx/audio_encoder_q4f16.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..3c76e4d7cc2e6524c583241e74198c6b9ae20d9a --- /dev/null +++ b/onnx/audio_encoder_q4f16.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df58e61a00bafa9449ee5fd52895ce952f158bbdd1fe38df8a68f48f36842e62 +size 171258112 diff --git a/onnx/audio_encoder_quantized.onnx b/onnx/audio_encoder_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3d176c274f6b2b95c4c91be114e9c87e59703862 --- /dev/null +++ b/onnx/audio_encoder_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f12a5f8c5db8cd7be763453657b74f458cb591a03cfeb9b9ebc941db182525f0 +size 265016 diff --git a/onnx/audio_encoder_quantized.onnx_data b/onnx/audio_encoder_quantized.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..57794a5927d78a73f532b252ea5e9c21c0ec057d --- /dev/null +++ b/onnx/audio_encoder_quantized.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33257b54b4bc8f84b873244a649be6aa90847e121bfaa648227081a7f55cc4ae +size 341877248 diff --git a/onnx/decoder_model_merged.onnx b/onnx/decoder_model_merged.onnx new file mode 100644 index 0000000000000000000000000000000000000000..4686f677f8c03fe5d3e63666d31ea774b13211b1 --- /dev/null +++ b/onnx/decoder_model_merged.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7f0448a61b944293f327f0a8742a2d47f226627fffc62084d34010f6e068c49 +size 524763 diff --git a/onnx/decoder_model_merged.onnx_data b/onnx/decoder_model_merged.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..52783542a9eb190650a77189e8e2b1028b4581df --- /dev/null +++ b/onnx/decoder_model_merged.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3014f451f610db57a094d1dd67a54b6a5480608ec93bb2273932374bc1cb811f +size 2065537024 diff --git a/onnx/decoder_model_merged.onnx_data_1 b/onnx/decoder_model_merged.onnx_data_1 new file mode 100644 index 0000000000000000000000000000000000000000..2cc82591580f00625a890206f8f7142626e139de --- /dev/null +++ b/onnx/decoder_model_merged.onnx_data_1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cadb0ea679d5ca07a1e1d509bc640edad9776ac793a4faa3eae4c45c214be25 +size 1972701184 diff --git a/onnx/decoder_model_merged.onnx_data_2 b/onnx/decoder_model_merged.onnx_data_2 new file mode 100644 index 0000000000000000000000000000000000000000..344875169bbb4e01f90047020db2f4d41bcff5bd --- /dev/null +++ b/onnx/decoder_model_merged.onnx_data_2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a949dd5fa1b8477af8871809117d55143f5e693c874e9cc05b452104b92907 +size 2063852544 diff --git a/onnx/decoder_model_merged.onnx_data_3 b/onnx/decoder_model_merged.onnx_data_3 new file mode 100644 index 0000000000000000000000000000000000000000..82f18b403ee7dbe85278848cee91a5c1184ae368 --- /dev/null +++ b/onnx/decoder_model_merged.onnx_data_3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93fa5ec36406f5240a9316847ab8b81ddc8f445b8a2f2cf19024890546660a31 +size 1809004544 diff --git a/onnx/decoder_model_merged.onnx_data_4 b/onnx/decoder_model_merged.onnx_data_4 new file mode 100644 index 0000000000000000000000000000000000000000..dbd40101545e09f3fb15fdbea504acefa3a13c37 --- /dev/null +++ b/onnx/decoder_model_merged.onnx_data_4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4ac4dec680d3a0a0e936f6ae1f95345f6e9fe3f18667595df870abdeb7da776 +size 1610612736 diff --git a/onnx/decoder_model_merged_fp16.onnx b/onnx/decoder_model_merged_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..e2bc666ec0007456598ab0ebf93623fe24b9da74 --- /dev/null +++ b/onnx/decoder_model_merged_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34e54181a58d4896c60773ccdb2833ba963114953b7cc0685b34f1c91321e4d4 +size 549636 diff --git a/onnx/decoder_model_merged_fp16.onnx_data b/onnx/decoder_model_merged_fp16.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..71a42f7f7d605d7e6d065fd53fb77ac7ea3da4af --- /dev/null +++ b/onnx/decoder_model_merged_fp16.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072088206a214a434615401ce4da3c524fab05e100b716d5e7d5a58164a366ab +size 2094595072 diff --git a/onnx/decoder_model_merged_fp16.onnx_data_1 b/onnx/decoder_model_merged_fp16.onnx_data_1 new file mode 100644 index 0000000000000000000000000000000000000000..ec74c31d79709dbf72a2ff3301cdf08f4f424e74 --- /dev/null +++ b/onnx/decoder_model_merged_fp16.onnx_data_1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ccc497a2f29a82b47b3566038e020a3ce10ce5026aa2cb83550e90f4b54c8f4 +size 1860925440 diff --git a/onnx/decoder_model_merged_fp16.onnx_data_2 b/onnx/decoder_model_merged_fp16.onnx_data_2 new file mode 100644 index 0000000000000000000000000000000000000000..59512058112e7493cbe978c048a6fc76b60216c7 --- /dev/null +++ b/onnx/decoder_model_merged_fp16.onnx_data_2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f35317efd0a159769d6499c327742121a4bc13767b1b3725cfe21a9ab75a430a +size 805306368 diff --git a/onnx/decoder_model_merged_q4.onnx b/onnx/decoder_model_merged_q4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..f0f1894024ebe5df79a8d10c7c3c0d3cd282bfdb --- /dev/null +++ b/onnx/decoder_model_merged_q4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6edb929bf342c524728d37efd400285ee71525e8fe64ff996341f78c3e577d2 +size 647599 diff --git a/onnx/decoder_model_merged_q4.onnx_data b/onnx/decoder_model_merged_q4.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..b8a20d0bdc401a1c973e80117bc1545bcbf89cb4 --- /dev/null +++ b/onnx/decoder_model_merged_q4.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b879fe4b946c9b9ff6acb60f7c5eda3d2c9c4df8625895feb2d1e269002f0345 +size 1864102912 diff --git a/onnx/decoder_model_merged_q4f16.onnx b/onnx/decoder_model_merged_q4f16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..7adc582aa61c3e48cc7ce273d5da80043076fb1a --- /dev/null +++ b/onnx/decoder_model_merged_q4f16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c0f1fe04f9a3a048fb3319c0671b6cf0346bf33a3a8624c853bcffe01c24a4 +size 673231 diff --git a/onnx/decoder_model_merged_q4f16.onnx_data b/onnx/decoder_model_merged_q4f16.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..60066fedf33c0457b82a7dbef4f16ec57f46f676 --- /dev/null +++ b/onnx/decoder_model_merged_q4f16.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b27245a7396cb7039a4e4118bd2a8aa35106bae381522edf7c4867b5f22bb10 +size 1519700992 diff --git a/onnx/decoder_model_merged_quantized.onnx b/onnx/decoder_model_merged_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..444b46aeef86472f31f014580517432ba18f9326 --- /dev/null +++ b/onnx/decoder_model_merged_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3bad4d5330dd3ed1bd588907754c393004114ac4c4f14eac7f228f2d9705ad +size 654818 diff --git a/onnx/decoder_model_merged_quantized.onnx_data b/onnx/decoder_model_merged_quantized.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..95489f460dc995964e604a698d2c0d82cb1bd504 --- /dev/null +++ b/onnx/decoder_model_merged_quantized.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e04502814af3a6086397aa2eff45abc1b564df2eca6a9be049a00f82c1745d2 +size 2094681088 diff --git a/onnx/decoder_model_merged_quantized.onnx_data_1 b/onnx/decoder_model_merged_quantized.onnx_data_1 new file mode 100644 index 0000000000000000000000000000000000000000..801cc0dfa4829fa96beae517981cc4bb04475cf8 --- /dev/null +++ b/onnx/decoder_model_merged_quantized.onnx_data_1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc09f165ded30fe1e48cd8997f2db7e8f0ebadb1108fa9b1b51201d86a04535 +size 944775168 diff --git a/onnx/embed_tokens.onnx b/onnx/embed_tokens.onnx new file mode 100644 index 0000000000000000000000000000000000000000..b99263b46b991ec4e1c8ec2999f60ef3a0a9b0fe --- /dev/null +++ b/onnx/embed_tokens.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93840f00078b7f22ae3302ca15643d363059214e8a9e32ddd38dce1a6cfa6ece +size 4243 diff --git a/onnx/embed_tokens.onnx_data b/onnx/embed_tokens.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..9776a6456064228179dd6993fb3401ab4f4acdfb --- /dev/null +++ b/onnx/embed_tokens.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a9d41b71f0b15c47807d3874db443daf67ade7d9750c361c00fcab95ac508c0 +size 1610612736 diff --git a/onnx/embed_tokens.onnx_data_1 b/onnx/embed_tokens.onnx_data_1 new file mode 100644 index 0000000000000000000000000000000000000000..d9f0b3fc3ffbc4ef2b44d075be78657e5fd80760 --- /dev/null +++ b/onnx/embed_tokens.onnx_data_1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd460a24efb42f32cd4c15a2a4ab6e596bb64399b589508960217b3d6d6ba9c3 +size 9395240960 diff --git a/onnx/embed_tokens_fp16.onnx b/onnx/embed_tokens_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..05e0a4e530620143b4cabeca483b1d5fc4599f2d --- /dev/null +++ b/onnx/embed_tokens_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cf75108f796accccd4756724f331acac6f464d656d5c0cf1766e9305772abdc +size 4713 diff --git a/onnx/embed_tokens_fp16.onnx_data b/onnx/embed_tokens_fp16.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..24f8c45edc4e9b8447e8523751942a1ac00dbf23 --- /dev/null +++ b/onnx/embed_tokens_fp16.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6055646b44b9eca25abe2941612b1f104d56dd2f9b8b54776e328e351d4fd7 +size 805306368 diff --git a/onnx/embed_tokens_fp16.onnx_data_1 b/onnx/embed_tokens_fp16.onnx_data_1 new file mode 100644 index 0000000000000000000000000000000000000000..8e35ab3f9e69d7f7e59f1156b40044e14af027fb --- /dev/null +++ b/onnx/embed_tokens_fp16.onnx_data_1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d7bf38285e3f007b5d28c3cd4d81e0b8f69c2534a53d25fb2952ab9b17b48ca +size 4697620480 diff --git a/onnx/embed_tokens_q4.onnx b/onnx/embed_tokens_q4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9e1626c51f8eb67484b81246ca86c7e7a97e6c03 --- /dev/null +++ b/onnx/embed_tokens_q4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8c8a2bcc30e8ded7f636967c2a58a346116583356dd933720b005fc88079c4 +size 5142 diff --git a/onnx/embed_tokens_q4.onnx_data b/onnx/embed_tokens_q4.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..4a9f2576309dd5c8773d4f86317c0665f2541965 --- /dev/null +++ b/onnx/embed_tokens_q4.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40fa957d9988b8a0160c8b0eb5c3f781a237627e9f7153f30514a4ffb2e62888 +size 1762656256 diff --git a/onnx/embed_tokens_q4f16.onnx b/onnx/embed_tokens_q4f16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9a21220623b4686d5296e5b9b290861f08540a50 --- /dev/null +++ b/onnx/embed_tokens_q4f16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7ca53f6a169471b5699b2f57ee4c7aa2c73732b0152f3909e64b71384444825 +size 5621 diff --git a/onnx/embed_tokens_q4f16.onnx_data b/onnx/embed_tokens_q4f16.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..c8226442549810cfceccfa4ba8d48165044db38d --- /dev/null +++ b/onnx/embed_tokens_q4f16.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:024b199e6358ed42970f807686add5f9430d7e254ca7ce22fc9c83f015b9c517 +size 1590689792 diff --git a/onnx/embed_tokens_quantized.onnx b/onnx/embed_tokens_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..9b8caad5445902045daf3d5619b473542229b679 --- /dev/null +++ b/onnx/embed_tokens_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06511401d70359582048796351c894adbd452471f2815b0198b93100e82b37e8 +size 5174 diff --git a/onnx/embed_tokens_quantized.onnx_data b/onnx/embed_tokens_quantized.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..1befc79ac512ef4bc0cd65cd1afa50937849150c --- /dev/null +++ b/onnx/embed_tokens_quantized.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea1f5dd855f51b685aeeee967481467c148a41ebdd733d58856b407945b46a3 +size 465567744 diff --git a/onnx/embed_tokens_quantized.onnx_data_1 b/onnx/embed_tokens_quantized.onnx_data_1 new file mode 100644 index 0000000000000000000000000000000000000000..a896f815bd8b8c94b30e01c4df4f86e31ff4bba0 --- /dev/null +++ b/onnx/embed_tokens_quantized.onnx_data_1 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:143ff6a1eea191af79be401378bf8970d56ebde33daa6445a156e28acf6a7bc9 +size 2348810240 diff --git a/onnx/embed_tokens_quantized.onnx_data_2 b/onnx/embed_tokens_quantized.onnx_data_2 new file mode 100644 index 0000000000000000000000000000000000000000..39f77572a9046f2b2fcd5a2936f22c5818635553 --- /dev/null +++ b/onnx/embed_tokens_quantized.onnx_data_2 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c90b413e9bde49f74c6634e7516b8b3980e207c979ee8af075f342cc1f3eecbe +size 367001600 diff --git a/onnx/vision_encoder.onnx b/onnx/vision_encoder.onnx new file mode 100644 index 0000000000000000000000000000000000000000..d3bb3beb8e110e498c6ab70f05182d80f78a4cef --- /dev/null +++ b/onnx/vision_encoder.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d34fd8510310ae330704c5b3d1571e6a8faa419cd5cd5d709a230fddc366310d +size 161900 diff --git a/onnx/vision_encoder.onnx_data b/onnx/vision_encoder.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..f39ba0180832163df2bdda5d71930ad5e3f98fb7 --- /dev/null +++ b/onnx/vision_encoder.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0468ff0c7994459072f712da835eada7a9a8eb21f92ca561ab61570ed3ef15b +size 674174144 diff --git a/onnx/vision_encoder_fp16.onnx b/onnx/vision_encoder_fp16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..3d8917f4bd400e2d359f478116bef1ac205f8726 --- /dev/null +++ b/onnx/vision_encoder_fp16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e8a95ce26015f10bf7249c2f36ab593fa630bd8b9fd9cb4deb6c308a7e75712 +size 150672 diff --git a/onnx/vision_encoder_fp16.onnx_data b/onnx/vision_encoder_fp16.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..be629095d3002cc5aac6badd7d02ef8f3d139acd --- /dev/null +++ b/onnx/vision_encoder_fp16.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c968a2928b5087f6f5e3628fee436088ad5bfbc5d79d97410322c40aab0b867 +size 337088192 diff --git a/onnx/vision_encoder_q4.onnx b/onnx/vision_encoder_q4.onnx new file mode 100644 index 0000000000000000000000000000000000000000..a8928248a2d75ada8f6e391717eb5cdb3edfc4a1 --- /dev/null +++ b/onnx/vision_encoder_q4.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc90fb094fca9ea9aec7f2ef069e37ee01cb3b454f571bf46843e056b951182 +size 200072 diff --git a/onnx/vision_encoder_q4.onnx_data b/onnx/vision_encoder_q4.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..62e3cd72ccdf14bcf624828dd25d8d3ac6af78f8 --- /dev/null +++ b/onnx/vision_encoder_q4.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df747f790d1f7f451227124388fef863f09d4fb5a6f7e5052472a4f407cc23b9 +size 112105664 diff --git a/onnx/vision_encoder_q4f16.onnx b/onnx/vision_encoder_q4f16.onnx new file mode 100644 index 0000000000000000000000000000000000000000..fb8bdb1a027b4157a5de4abff2fd4e02fb7c8b8b --- /dev/null +++ b/onnx/vision_encoder_q4f16.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a4e48e519ade4eeddbb4cdadb812a7251aea871f7fb5f50576615fd3af22a3 +size 189124 diff --git a/onnx/vision_encoder_q4f16.onnx_data b/onnx/vision_encoder_q4f16.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..3e5a288365720812e5a50c76ae1fd62f1805f506 --- /dev/null +++ b/onnx/vision_encoder_q4f16.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0835071d2c79c105f8e1b549b7f8dd8c9af07fa95f01ead2e7add280602d3c6d +size 99189440 diff --git a/onnx/vision_encoder_quantized.onnx b/onnx/vision_encoder_quantized.onnx new file mode 100644 index 0000000000000000000000000000000000000000..68f0ea369a663ecfd75567369059376ce73fe279 --- /dev/null +++ b/onnx/vision_encoder_quantized.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae62d0e57f2b665f3e3ba5d0312337eac9860f69e14de88bfdf2e17fa52a8ea +size 203173 diff --git a/onnx/vision_encoder_quantized.onnx_data b/onnx/vision_encoder_quantized.onnx_data new file mode 100644 index 0000000000000000000000000000000000000000..d21386ea6797b71c1e778a61e5741cc8587fa9d5 --- /dev/null +++ b/onnx/vision_encoder_quantized.onnx_data @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ea7a1a2eb1f800621ab6772540af3d6988743cb475475c3df086e00389ca707 +size 198376640 diff --git a/preprocessor_config.json b/preprocessor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..6418e09c5fdb500f7ad9e86a7de9de7e60317f34 --- /dev/null +++ b/preprocessor_config.json @@ -0,0 +1,3 @@ +{ + "processor_class": "Gemma4Processor" +} diff --git a/processor_config.json b/processor_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5465974d23e1eca2c46c2809b26c997946ce0d90 --- /dev/null +++ b/processor_config.json @@ -0,0 +1,75 @@ +{ + "audio_ms_per_token": 40, + "audio_seq_length": 750, + "feature_extractor": { + "dither": 0.0, + "feature_extractor_type": "Gemma4AudioFeatureExtractor", + "feature_size": 128, + "fft_length": 512, + "fft_overdrive": false, + "frame_length": 320, + "hop_length": 160, + "input_scale_factor": 1.0, + "max_frequency": 8000.0, + "mel_floor": 0.001, + "min_frequency": 0.0, + "padding_side": "right", + "padding_value": 0.0, + "per_bin_mean": null, + "per_bin_stddev": null, + "preemphasis": 0.0, + "preemphasis_htk_flavor": true, + "return_attention_mask": true, + "sampling_rate": 16000 + }, + "image_processor": { + "do_convert_rgb": true, + "do_normalize": false, + "do_rescale": true, + "do_resize": true, + "image_mean": [ + 0.0, + 0.0, + 0.0 + ], + "image_processor_type": "Gemma4ImageProcessor", + "image_seq_length": 280, + "image_std": [ + 1.0, + 1.0, + 1.0 + ], + "max_soft_tokens": 280, + "patch_size": 16, + "pooling_kernel_size": 3, + "resample": 3, + "rescale_factor": 0.00392156862745098 + }, + "image_seq_length": 280, + "processor_class": "Gemma4Processor", + "video_processor": { + "do_convert_rgb": true, + "do_normalize": true, + "do_rescale": true, + "do_resize": true, + "do_sample_frames": true, + "image_mean": [ + 0.0, + 0.0, + 0.0 + ], + "image_std": [ + 1.0, + 1.0, + 1.0 + ], + "max_soft_tokens": 70, + "num_frames": 32, + "patch_size": 16, + "pooling_kernel_size": 3, + "resample": 3, + "rescale_factor": 0.00392156862745098, + "return_metadata": false, + "video_processor_type": "Gemma4VideoProcessor" + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..d781f9b567efe2d2c999ec37ba3ff3a505838c60 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47bd35616c7c782aaca6ccf48c75f3461d5877170984b8836b375107d0a9f566 +size 19439251 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cdac68ad7864b31ecf321916b7f7cb5149e6aeee --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,96 @@ +{ + "audio_token": "<|audio|>", + "backend": "tokenizers", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "bos_token": "", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eos_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "extra_special_tokens": [ + "<|video|>" + ], + "image_token": "<|image|>", + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "audio_token": "<|audio|>", + "boa_token": "<|audio>", + "boi_token": "<|image>", + "eoa_token": "", + "eoc_token": "", + "eoi_token": "", + "eot_token": "", + "escape_token": "<|\"|>", + "etc_token": "", + "etd_token": "", + "etr_token": "", + "image_token": "<|image|>", + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>" + }, + "pad_token": "", + "padding_side": "left", + "processor_class": "Gemma4Processor", + "response_schema": { + "properties": { + "role": { + "const": "assistant" + }, + "thinking": { + "type": "string", + "x-regex": "<\\|channel\\>(?:thought\\n)?(.+?)" + }, + "tool_calls": { + "items": { + "properties": { + "function": { + "properties": { + "arguments": { + "type": "string", + "x-mapping-regex": { + "(\\{|,)\\s*([a-zA-Z_]\\w+):": "\\1\"\\2\":", + "<\\|\\\"\\|>": "\"" + }, + "x-regex": "call:[^{]+(\\{.*\\})" + }, + "name": { + "type": "string", + "x-regex": "call:([^{]+)" + } + }, + "type": "object" + }, + "type": { + "const": "function" + } + }, + "type": "object" + }, + "type": "array", + "x-regex-iterator": "<\\|tool_call\\>(.*?)" + } + }, + "type": "object" + }, + "soc_token": "<|channel>", + "sot_token": "<|turn>", + "stc_token": "<|tool_call>", + "std_token": "<|tool>", + "str_token": "<|tool_response>", + "think_token": "<|think|>", + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "chat_template": "{%- macro format_parameters(properties, required) -%}\n {%- set standard_keys = ['description', 'type', 'properties', 'required', 'nullable'] -%}\n {%- set ns = namespace(found_first=false) -%}\n {%- for key, value in properties | dictsort -%}\n {%- set add_comma = false -%}\n {%- if key not in standard_keys -%}\n {%- if ns.found_first %},{% endif -%}\n {%- set ns.found_first = true -%}\n {{ key }}:{\n {%- if value['description'] -%}\n description:<|\"|>{{ value['description'] }}<|\"|>\n {%- set add_comma = true -%}\n {%- endif -%}\n {%- if value['nullable'] %}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n nullable:true\n {%- endif -%}\n {%- if value['type'] | upper == 'STRING' -%}\n {%- if value['enum'] -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n enum:{{ format_argument(value['enum']) }}\n {%- endif -%}\n {%- elif value['type'] | upper == 'OBJECT' -%}\n ,properties:{\n {%- if value['properties'] is defined and value['properties'] is mapping -%}\n {{- format_parameters(value['properties'], value['required'] | default([])) -}}\n {%- elif value is mapping -%}\n {{- format_parameters(value, value['required'] | default([])) -}}\n {%- endif -%}\n }\n {%- if value['required'] -%}\n ,required:[\n {%- for item in value['required'] | default([]) -%}\n <|\"|>{{- item -}}<|\"|>\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n ]\n {%- endif -%}\n {%- elif value['type'] | upper == 'ARRAY' -%}\n {%- if value['items'] is mapping and value['items'] -%}\n ,items:{\n {%- set ns_items = namespace(found_first=false) -%}\n {%- for item_key, item_value in value['items'] | dictsort -%}\n {%- if item_value is not none -%}\n {%- if ns_items.found_first %},{% endif -%}\n {%- set ns_items.found_first = true -%}\n {%- if item_key == 'properties' -%}\n properties:{\n {%- if item_value is mapping -%}\n {{- format_parameters(item_value, value['items']['required'] | default([])) -}}\n {%- endif -%}\n }\n {%- elif item_key == 'required' -%}\n required:[\n {%- for req_item in item_value -%}\n <|\"|>{{- req_item -}}<|\"|>\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n ]\n {%- elif item_key == 'type' -%}\n {%- if item_value is string -%}\n type:{{ format_argument(item_value | upper) }}\n {%- else -%}\n type:{{ format_argument(item_value | map('upper') | list) }}\n {%- endif -%}\n {%- else -%}\n {{ item_key }}:{{ format_argument(item_value) }}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n }\n {%- endif -%}\n {%- endif -%}\n {%- if add_comma %},{%- else -%} {%- set add_comma = true -%} {% endif -%}\n type:<|\"|>{{ value['type'] | upper }}<|\"|>}\n {%- endif -%}\n {%- endfor -%}\n{%- endmacro -%}\n{%- macro format_function_declaration(tool_data) -%}\n declaration:{{- tool_data['function']['name'] -}}{description:<|\"|>{{- tool_data['function']['description'] -}}<|\"|>\n {%- set params = tool_data['function']['parameters'] -%}\n {%- if params -%}\n ,parameters:{\n {%- if params['properties'] -%}\n properties:{ {{- format_parameters(params['properties'], params['required']) -}} },\n {%- endif -%}\n {%- if params['required'] -%}\n required:[\n {%- for item in params['required'] -%}\n <|\"|>{{- item -}}<|\"|>\n {{- ',' if not loop.last -}}\n {%- endfor -%}\n ],\n {%- endif -%}\n {%- if params['type'] -%}\n type:<|\"|>{{- params['type'] | upper -}}<|\"|>}\n {%- endif -%}\n {%- endif -%}\n {%- if 'response' in tool_data['function'] -%}\n {%- set response_declaration = tool_data['function']['response'] -%}\n ,response:{\n {%- if response_declaration['description'] -%}\n description:<|\"|>{{- response_declaration['description'] -}}<|\"|>,\n {%- endif -%}\n {%- if response_declaration['type'] | upper == 'OBJECT' -%}\n type:<|\"|>{{- response_declaration['type'] | upper -}}<|\"|>}\n {%- endif -%}\n {%- endif -%}\n }\n{%- endmacro -%}\n{%- macro format_argument(argument, escape_keys=True) -%}\n {%- if argument is string -%}\n {{- '<|\"|>' + argument + '<|\"|>' -}}\n {%- elif argument is boolean -%}\n {{- 'true' if argument else 'false' -}}\n {%- elif argument is mapping -%}\n {{- '{' -}}\n {%- set ns = namespace(found_first=false) -%}\n {%- for key, value in argument | dictsort -%}\n {%- if ns.found_first %},{% endif -%}\n {%- set ns.found_first = true -%}\n {%- if escape_keys -%}\n {{- '<|\"|>' + key + '<|\"|>' -}}\n {%- else -%}\n {{- key -}}\n {%- endif -%}\n :{{- format_argument(value, escape_keys=escape_keys) -}}\n {%- endfor -%}\n {{- '}' -}}\n {%- elif argument is sequence -%}\n {{- '[' -}}\n {%- for item in argument -%}\n {{- format_argument(item, escape_keys=escape_keys) -}}\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n {{- ']' -}}\n {%- else -%}\n {{- argument -}}\n {%- endif -%}\n{%- endmacro -%}\n{%- macro strip_thinking(text) -%}\n {%- set ns = namespace(result='') -%}\n {%- for part in text.split('') -%}\n {%- if '<|channel>' in part -%}\n {%- set ns.result = ns.result + part.split('<|channel>')[0] -%}\n {%- else -%}\n {%- set ns.result = ns.result + part -%}\n {%- endif -%}\n {%- endfor -%}\n {{- ns.result | trim -}}\n{%- endmacro -%}\n\n{%- set ns = namespace(prev_message_type=None) -%}\n{%- set loop_messages = messages -%}\n{{ bos_token }}\n{#- Handle System/Tool Definitions Block -#}\n{%- if (enable_thinking is defined and enable_thinking) or tools or messages[0]['role'] in ['system', 'developer'] -%}\n {{- '<|turn>system\\n' -}}\n\n {#- Inject Thinking token at the very top of the FIRST system turn -#}\n {%- if enable_thinking is defined and enable_thinking -%}\n {{- '<|think|>' -}}\n {%- set ns.prev_message_type = 'think' -%}\n {%- endif -%}\n\n {%- if messages[0]['role'] in ['system', 'developer'] -%}\n {{- messages[0]['content'] | trim -}}\n {%- set loop_messages = messages[1:] -%}\n {%- endif -%}\n\n {%- if tools -%}\n {%- for tool in tools %}\n {{- '<|tool>' -}}\n {{- format_function_declaration(tool) | trim -}}\n {{- '' -}}\n {%- endfor %}\n {%- set ns.prev_message_type = 'tool' -%}\n {%- endif -%}\n\n {{- '\\n' -}}\n{%- endif %}\n\n{#- Loop through messages -#}\n{%- for message in loop_messages -%}\n {%- set ns.prev_message_type = None -%}\n {%- set role = 'model' if message['role'] == 'assistant' else message['role'] -%}\n {{- '<|turn>' + role + '\\n' }}\n\n {%- if message['tool_calls'] -%}\n {%- for tool_call in message['tool_calls'] -%}\n {%- set function = tool_call['function'] -%}\n {{- '<|tool_call>call:' + function['name'] + '{' -}}\n {%- if function['arguments'] is mapping -%}\n {%- set ns_args = namespace(found_first=false) -%}\n {%- for key, value in function['arguments'] | dictsort -%}\n {%- if ns_args.found_first %},{% endif -%}\n {%- set ns_args.found_first = true -%}\n {{- key -}}:{{- format_argument(value, escape_keys=False) -}}\n {%- endfor -%}\n {%- elif function['arguments'] is string -%}\n {{- function['arguments'] -}}\n {%- endif -%}\n {{- '}' -}}\n {%- endfor -%}\n {%- set ns.prev_message_type = 'tool_call' -%}\n {%- endif -%}\n\n {%- if message['tool_responses'] -%}\n {#- Tool Response handling -#}\n {%- for tool_response in message['tool_responses'] -%}\n {{- '<|tool_response>' -}}\n {%- if tool_response['response'] is mapping -%}\n {{- 'response:' + tool_response['name'] | default('unknown') + '{' -}}\n {%- for key, value in tool_response['response'] | dictsort -%}\n {{- key -}}:{{- format_argument(value, escape_keys=False) -}}\n {%- if not loop.last %},{% endif -%}\n {%- endfor -%}\n {{- '}' -}}\n {%- else -%}\n {{- 'response:' + tool_response['name'] | default('unknown') + '{value:' + format_argument(tool_response['response'], escape_keys=False) + '}' -}}\n {%- endif -%}\n {{- '' -}}\n {%- endfor -%}\n {%- set ns.prev_message_type = 'tool_response' -%}\n {%- endif -%}\n\n {%- if message['content'] is string -%}\n {%- if role == 'model' -%}\n {{- strip_thinking(message['content']) -}}\n {%- else -%}\n {{- message['content'] | trim -}}\n {%- endif -%}\n {%- elif message['content'] is sequence -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'text' -%}\n {%- if role == 'model' -%}\n {{- strip_thinking(item['text']) -}}\n {%- else -%}\n {{- item['text'] | trim -}}\n {%- endif -%}\n {%- elif item['type'] == 'image' -%}\n {{- '\\n\\n<|image|>\\n\\n' -}}\n {%- set ns.prev_message_type = 'image' -%}\n {%- elif item['type'] == 'audio' -%}\n {{- '<|audio|>' -}}\n {%- set ns.prev_message_type = 'audio' -%}\n {%- elif item['type'] == 'video' -%}\n {{- '\\n\\n<|video|>\\n\\n' -}}\n {%- set ns.prev_message_type = 'video' -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n\n {%- if not (message['tool_responses'] and not message['content']) -%}\n {{- '\\n' -}}\n {%- endif -%}\n{%- endfor -%}\n\n{%- if add_generation_prompt -%}\n {%- if ns.prev_message_type != 'tool_response' -%}\n {{- '<|turn>model\\n' -}}\n {%- endif -%}\n{%- endif -%}" +} \ No newline at end of file