{ "arch_id": "gemma4-assistant-mtp", "artifacts": { "assistant_dtype": null, "assistant_format": "q8-g64-affine", "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q8-g64-mlx", "assistant_quantization": { "bits": 8, "group_size": 64, "mode": "affine" }, "disk_ok": true, "min_free_gib": 220.0, "observed_free_gib": 630.7444229125977, "target_dtype": null, "target_format": "q8-g64-affine", "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-q8-g64-mlx", "target_quantization": { "bits": 8, "group_size": 64, "mode": "affine" } }, "backend": "gemma4_assistant", "benchmark": { "draft_block_sizes": [ 3, 4, 5, 6 ], "draft_sampler": { "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.", "inherits_target_sampler": true, "temperature": null, "top_k": null, "top_p": null }, "max_mode": true, "max_tokens": 1000, "profile": "sustained", "prompt_suite": "mtplx/benchmarks/prompts/flappy.jsonl", "reasoning": "off", "sampler_source": { "do_sample": true, "local_reference": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-q8-g64-mlx/generation_config.json", "name": "official Gemma 4 generation_config.json", "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "seed": 0, "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "blockers": [], "can_run_now": true, "gates": { "generated_tokens": 1000, "longer_lengths_blocked_until_160_passes": true, "median_of_3_min_speedup_vs_ar": 2.0, "min_speedup_vs_ar": 2.0, "mtp_peak_memory_lte_ar_multiplier": 1.18, "mtp_peak_memory_lte_ar_plus_gib": 6 }, "official_sources": { "assistant": "google/gemma-4-31B-it-assistant", "assistant_revision": "cffbbd2cea41ea56a0fa5b0487e0d445121fd204", "target": "google/gemma-4-31B-it", "target_revision": "145dc2508c480a64b47242f160d286cff94a2343" }, "pair": { "assistant_exists": true, "assistant_inspection": { "architecture": "Gemma4AssistantForCausalLM", "architecture_recognized": true, "backbone_hidden_size": 5376, "compatibility": { "arch_id": "gemma4-assistant-mtp", "can_run": false, "exit_code": 3, "message": "Official-style Gemma 4 31B assistant artifact recognized. This is an assistant-backed MTP pair, not a standalone target; MTPLX scaffold is present but QA and the 160-token speed/memory gate are still pending.", "mtp_supported": "recognized", "recognized": true, "recommended_backend": "gemma4_assistant", "recommended_profile": "performance-cold", "runtime_compatibility": "assistant-pair-qa-pending", "runtime_contract": null, "runtime_contract_error": null, "runtime_contract_path": null, "support_level": "architecture-scaffolded-qa-pending", "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.", "supported": false, "tier": "architecture-compatible-but-unverified", "unsafe_force_required": false, "unverified_model": true }, "config_exists": true, "hidden_size": 1024, "layer_types": [ "sliding_attention", "sliding_attention", "sliding_attention", "full_attention" ], "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q8-g64-mlx", "model_files": [ "model.safetensors" ], "model_type": "gemma4_assistant", "mtp": { "exists": false, "expected_tensor_count": 15, "extra_keys": [], "metadata_only": true, "missing_expected_keys": [], "mtp_file": "model.safetensors.index.json::embedded", "passes_tensor_gate": false, "sidecar_format": "bf16", "tensor_count": 0, "tensors": [] }, "mtp_arch": "gemma4-assistant-mtp", "mtp_num_hidden_layers": 0, "mtp_pattern": null, "mtp_supported": "recognized", "num_hidden_layers": 4, "num_kv_shared_layers": 4, "passes_primary_gate": false, "quantization": { "bits": 8, "group_size": 64, "mode": "affine" }, "recommended_backend": "gemma4_assistant", "recommended_profile": "performance-cold", "runtime_compatibility": "assistant-pair-qa-pending", "runtime_contract_path": null, "sidecars": { "preprocessor_config.json": false, "processor_config.json": false, "video_preprocessor_config.json": false }, "source": "local", "support_level": "architecture-scaffolded-qa-pending", "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.", "unverified_model": true, "use_ordered_embeddings": false, "vocab_size": 262144 }, "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q8-g64-mlx", "pair_error": null, "pair_valid": true, "target_exists": true, "target_inspection": { "architecture": "Gemma4ForConditionalGeneration", "architecture_recognized": false, "backbone_hidden_size": null, "compatibility": { "arch_id": null, "can_run": false, "exit_code": 2, "message": "Model has no MTP head. MTPLX requires an MTP-equipped model.", "mtp_supported": "no", "recognized": false, "recommended_backend": null, "recommended_profile": null, "runtime_compatibility": "unsupported", "runtime_contract": null, "runtime_contract_error": null, "runtime_contract_path": null, "support_level": "unsupported", "support_notes": null, "supported": false, "tier": "no-MTP", "unsafe_force_required": false, "unverified_model": false }, "config_exists": true, "hidden_size": 5376, "layer_types": [ "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention" ], "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-q8-g64-mlx", "model_files": [ "model-00001-of-00007.safetensors", "model-00002-of-00007.safetensors", "model-00003-of-00007.safetensors", "model-00004-of-00007.safetensors", "model-00005-of-00007.safetensors", "model-00006-of-00007.safetensors", "model-00007-of-00007.safetensors" ], "model_type": "gemma4_text", "mtp": { "exists": false, "expected_tensor_count": 15, "extra_keys": [], "metadata_only": true, "missing_expected_keys": [], "mtp_file": "model.safetensors.index.json::embedded", "passes_tensor_gate": false, "sidecar_format": "bf16", "tensor_count": 0, "tensors": [] }, "mtp_arch": null, "mtp_num_hidden_layers": 0, "mtp_pattern": null, "mtp_supported": "no", "num_hidden_layers": 60, "num_kv_shared_layers": 0, "passes_primary_gate": false, "quantization": { "bits": 8, "group_size": 64, "mode": "affine" }, "recommended_backend": null, "recommended_profile": null, "runtime_compatibility": "unsupported", "runtime_contract_path": null, "sidecars": { "preprocessor_config.json": false, "processor_config.json": false, "video_preprocessor_config.json": false }, "source": "local", "support_level": "unsupported", "support_notes": null, "unverified_model": false, "use_ordered_embeddings": null, "vocab_size": 262144 }, "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-q8-g64-mlx" }, "planned_commands": { "assistant_bf16_snapshot": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it-assistant', revision='cffbbd2cea41ea56a0fa5b0487e0d445121fd204', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q8-g64-mlx')\"", "gate": "mtplx bench gemma-mtp --target-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-q8-g64-mlx --assistant-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q8-g64-mlx --profile sustained --max --prompt-suite mtplx/benchmarks/prompts/flappy.jsonl --max-tokens 1000 --temperature 1.0 --top-p 0.95 --top-k 64 --seed 0 --reasoning off --draft-block-sizes 3,4,5,6 --json --output outputs/gemma4/flappy1000-targetq8-assistantq8-sweep.json", "target_flat4_g64": "uv run python -m mlx_lm.convert --hf-path /Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48 --mlx-path /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-q8-g64-mlx --quantize --q-bits 4 --q-group-size 64 --q-mode affine", "target_revision_download": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it', revision='145dc2508c480a64b47242f160d286cff94a2343', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48')\"" }, "qa_pending": true, "results": { "ar": { "active_memory_gib": 31.25938833784312, "cache_memory_gib": 1.949219443835318, "decode_s": 72.81026608298998, "generated_tokens": 1000, "mode": "ar", "peak_memory_gib": 31.318915149196982, "prefill_s": 2.473393041000236, "tok_s": 13.73432695411645, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ] }, "ar_confirmation": [ { "active_memory_gib": 31.25938833784312, "cache_memory_gib": 1.949219443835318, "decode_s": 72.81026608298998, "generated_tokens": 1000, "mode": "ar", "peak_memory_gib": 31.318915149196982, "prefill_s": 2.473393041000236, "tok_s": 13.73432695411645, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ] }, { "active_memory_gib": 31.749414331279695, "cache_memory_gib": 2.0085750371217728, "confirmation_repeat": 1, "decode_s": 76.81720787500672, "generated_tokens": 1000, "mode": "ar", "peak_memory_gib": 31.80895305145532, "prefill_s": 0.23355866699421313, "tok_s": 13.017916527598246, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ] }, { "active_memory_gib": 31.749414331279695, "cache_memory_gib": 2.0085750371217728, "confirmation_repeat": 2, "decode_s": 85.77613120799651, "generated_tokens": 1000, "mode": "ar", "peak_memory_gib": 31.80895305145532, "prefill_s": 0.2485222500108648, "tok_s": 11.658254877165346, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ] } ], "best_block_confirmation": [ { "acceptance": 0.9976047904191617, "accepted_drafts": 833, "active_memory_gib": 31.81573427375406, "block_size": 6, "cache_memory_gib": 66.49232691712677, "decode_s": 29.219117750006262, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 835, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 31.937621283344924, "prefill_s": 0.584632291996968, "row_distribution_evals": 0, "speedup_vs_ar": 2.491870791785778, "target_distribution_modes": { "batched_logits": 167 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.009512208969681524, "draft": 1.5696202969993465, "rollback": 9.41639591474086e-05, "target_distribution": 27.45037787995534, "target_hidden": 0.0, "verify": 0.17939917097100988 }, "tok_s": 34.22416818179891, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ], "verify_calls": 167 }, { "acceptance": 0.9976047904191617, "accepted_drafts": 833, "active_memory_gib": 31.81573427375406, "block_size": 6, "cache_memory_gib": 66.46690577454865, "confirmation_repeat": 1, "decode_s": 30.4154408339964, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 835, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 31.937621283344924, "prefill_s": 0.26414108400058467, "row_distribution_evals": 0, "speedup_vs_ar": 2.5255990302512874, "target_distribution_modes": { "batched_logits": 167 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.009377048874739558, "draft": 1.6042620869993698, "rollback": 8.99219885468483e-05, "target_distribution": 28.613301041928935, "target_hidden": 0.0, "verify": 0.1785685370414285 }, "tok_s": 32.87803735799434, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ], "verify_calls": 167 }, { "acceptance": 0.9976047904191617, "accepted_drafts": 833, "active_memory_gib": 31.81573427375406, "block_size": 6, "cache_memory_gib": 66.46690577454865, "confirmation_repeat": 2, "decode_s": 30.19647025001177, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 835, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 31.937621283344924, "prefill_s": 0.23273954199976288, "row_distribution_evals": 0, "speedup_vs_ar": 2.840601252325612, "target_distribution_modes": { "batched_logits": 167 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.00978036891319789, "draft": 1.6035626278899144, "rollback": 9.504603804089129e-05, "target_distribution": 28.391074630984804, "target_hidden": 0.0, "verify": 0.1814885419298662 }, "tok_s": 33.11645340400705, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ], "verify_calls": 167 } ], "best_block_size": 6, "best_speedup": 2.491870791785778, "blockers": [], "draft_sampler": { "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.", "inherits_target_sampler": true, "temperature": null, "top_k": null, "top_p": null }, "fan_restore": { "after": { "actual_max_rpm": 7823, "actual_min_rpm": 7234, "capacity_max_rpm": 7826, "capacity_min_rpm": 7826, "fans": [ { "actual_rpm": 7234, "max_capacity_rpm": 7826, "mode": "auto", "raw": { "actual_rpm": 7234, "index": 0, "max_rpm": 7826, "min_rpm": 2317, "mode": "auto", "target_rpm": 7245 }, "rpm": 7234, "target_rpm": 7245 }, { "actual_rpm": 7823, "max_capacity_rpm": 7826, "mode": "auto", "raw": { "actual_rpm": 7823, "index": 1, "max_rpm": 7826, "min_rpm": 2317, "mode": "auto", "target_rpm": 7824 }, "rpm": 7823, "target_rpm": 7824 } ], "max_rpm": 7823, "min_rpm": 7234, "ok": true, "raw": { "attempts": [ { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7234,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 7245\n },\n {\n \"actual_rpm\" : 7823,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 7824\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.4,\n \"TB0T\" : 33,\n \"TCDX\" : 67.7,\n \"TCHP\" : 60.9,\n \"TCMb\" : 75.5,\n \"TG0B\" : 33,\n \"TG0H\" : 33,\n \"TG0V\" : 33,\n \"TH0x\" : 40.4,\n \"TMVR\" : 61.7,\n \"TPDX\" : 67.1,\n \"TRDX\" : 68.2,\n \"TS0P\" : 69.2,\n \"Tg0j\" : 65.9,\n \"Tm08\" : 65,\n \"Tp04\" : 65.4,\n \"Tp08\" : 65.1,\n \"Tp0C\" : 65.1,\n \"Tp0G\" : 65.6,\n \"Tp0X\" : 65.6\n }\n}" } ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "ok": true, "status": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7234,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 7245\n },\n {\n \"actual_rpm\" : 7823,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 7824\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.4,\n \"TB0T\" : 33,\n \"TCDX\" : 67.7,\n \"TCHP\" : 60.9,\n \"TCMb\" : 75.5,\n \"TG0B\" : 33,\n \"TG0H\" : 33,\n \"TG0V\" : 33,\n \"TH0x\" : 40.4,\n \"TMVR\" : 61.7,\n \"TPDX\" : 67.1,\n \"TRDX\" : 68.2,\n \"TS0P\" : 69.2,\n \"Tg0j\" : 65.9,\n \"Tm08\" : 65,\n \"Tp04\" : 65.4,\n \"Tp08\" : 65.1,\n \"Tp0C\" : 65.1,\n \"Tp0G\" : 65.6,\n \"Tp0X\" : 65.6\n }\n}" } }, "target_max_rpm": 7824, "target_min_rpm": 7245 }, "message": "fan restore was attempted but not verified", "ok": false, "profile": "silent", "set_result": { "attempts": [ { "command": [ "sudo", "-n", "/Users/youssof/.mtplx/bin/thermalforge", "auto" ], "ok": true, "returncode": 0, "stderr": "No matching processes were found", "stdout": "Fans reset to Apple defaults" } ], "command": [ "sudo", "-n", "/Users/youssof/.mtplx/bin/thermalforge", "auto" ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "dry_run": false, "ok": true, "profile": "silent" } }, "fanmax": { "after": { "actual_max_rpm": 7424, "actual_min_rpm": 7357, "capacity_max_rpm": 7826, "capacity_min_rpm": 7826, "fans": [ { "actual_rpm": 7357, "max_capacity_rpm": 7826, "mode": "manual", "raw": { "actual_rpm": 7357, "index": 0, "max_rpm": 7826, "min_rpm": 2317, "mode": "manual", "target_rpm": 7826 }, "rpm": 7357, "target_rpm": 7826 }, { "actual_rpm": 7424, "max_capacity_rpm": 7826, "mode": "manual", "raw": { "actual_rpm": 7424, "index": 1, "max_rpm": 7826, "min_rpm": 2317, "mode": "manual", "target_rpm": 7826 }, "rpm": 7424, "target_rpm": 7826 } ], "max_rpm": 7424, "min_rpm": 7357, "ok": true, "raw": { "attempts": [ { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7357,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7424,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 33,\n \"TCDX\" : 66.4,\n \"TCHP\" : 59.5,\n \"TCMb\" : 73.7,\n \"TG0B\" : 33,\n \"TG0H\" : 33,\n \"TG0V\" : 33,\n \"TH0x\" : 39.1,\n \"TMVR\" : 59,\n \"TPDX\" : 64,\n \"TRDX\" : 66.9,\n \"TS0P\" : 66,\n \"Tg0j\" : 64.4,\n \"Tm08\" : 63.9,\n \"Tp04\" : 64.4,\n \"Tp08\" : 64.1,\n \"Tp0C\" : 64.3,\n \"Tp0G\" : 64.8,\n \"Tp0X\" : 64.4\n }\n}" } ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "ok": true, "status": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7357,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7424,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 33,\n \"TCDX\" : 66.4,\n \"TCHP\" : 59.5,\n \"TCMb\" : 73.7,\n \"TG0B\" : 33,\n \"TG0H\" : 33,\n \"TG0V\" : 33,\n \"TH0x\" : 39.1,\n \"TMVR\" : 59,\n \"TPDX\" : 64,\n \"TRDX\" : 66.9,\n \"TS0P\" : 66,\n \"Tg0j\" : 64.4,\n \"Tm08\" : 63.9,\n \"Tp04\" : 64.4,\n \"Tp08\" : 64.1,\n \"Tp0C\" : 64.3,\n \"Tp0G\" : 64.8,\n \"Tp0X\" : 64.4\n }\n}" } }, "target_max_rpm": 7826, "target_min_rpm": 7826 }, "baseline": { "actual_max_rpm": 6020, "actual_min_rpm": 5580, "capacity_max_rpm": 7826, "capacity_min_rpm": 7826, "fans": [ { "actual_rpm": 5580, "max_capacity_rpm": 7826, "mode": "auto", "raw": { "actual_rpm": 5580, "index": 0, "max_rpm": 7826, "min_rpm": 2317, "mode": "auto", "target_rpm": 5575 }, "rpm": 5580, "target_rpm": 5575 }, { "actual_rpm": 6020, "max_capacity_rpm": 7826, "mode": "auto", "raw": { "actual_rpm": 6020, "index": 1, "max_rpm": 7826, "min_rpm": 2317, "mode": "auto", "target_rpm": 6021 }, "rpm": 6020, "target_rpm": 6021 } ], "max_rpm": 6020, "min_rpm": 5580, "ok": true, "raw": { "attempts": [ { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5580,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5575\n },\n {\n \"actual_rpm\" : 6020,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6021\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.6,\n \"TB0T\" : 33,\n \"TCDX\" : 66.6,\n \"TCHP\" : 59.7,\n \"TCMb\" : 74.3,\n \"TG0B\" : 33,\n \"TG0H\" : 33,\n \"TG0V\" : 33,\n \"TH0x\" : 39.1,\n \"TMVR\" : 59.5,\n \"TPDX\" : 64.9,\n \"TRDX\" : 67.6,\n \"TS0P\" : 67,\n \"Tg0j\" : 64.9,\n \"Tm08\" : 64.5,\n \"Tp04\" : 64.3,\n \"Tp08\" : 64,\n \"Tp0C\" : 63.9,\n \"Tp0G\" : 64.7,\n \"Tp0X\" : 64.5\n }\n}" } ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "ok": true, "status": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5580,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5575\n },\n {\n \"actual_rpm\" : 6020,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6021\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.6,\n \"TB0T\" : 33,\n \"TCDX\" : 66.6,\n \"TCHP\" : 59.7,\n \"TCMb\" : 74.3,\n \"TG0B\" : 33,\n \"TG0H\" : 33,\n \"TG0V\" : 33,\n \"TH0x\" : 39.1,\n \"TMVR\" : 59.5,\n \"TPDX\" : 64.9,\n \"TRDX\" : 67.6,\n \"TS0P\" : 67,\n \"Tg0j\" : 64.9,\n \"Tm08\" : 64.5,\n \"Tp04\" : 64.3,\n \"Tp08\" : 64,\n \"Tp0C\" : 63.9,\n \"Tp0G\" : 64.7,\n \"Tp0X\" : 64.5\n }\n}" } }, "target_max_rpm": 6021, "target_min_rpm": 5575 }, "message": "fans ramped to max (actual 7357-7424 RPM; target 7826 RPM)", "ok": true, "profile": "max", "set_result": { "attempts": [ { "command": [ "sudo", "-n", "/Users/youssof/.mtplx/bin/thermalforge", "max" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "Fan 0: 5580 RPM \u2192 max (7826 RPM)\nFan 1: 6020 RPM \u2192 max (7826 RPM)" } ], "command": [ "sudo", "-n", "/Users/youssof/.mtplx/bin/thermalforge", "max" ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "dry_run": false, "ok": true, "profile": "max" } }, "max_tokens": 1000, "median_confirmation_speedup": 2.5439134852185834, "memory_gate": { "ar_peak_memory_gib": 31.80895305145532, "lte_ar_plus_6_gib": true, "lte_ar_times_1_18": true, "mtp_peak_memory_gib": 31.937621283344924 }, "mtp_by_block_size": { "3": { "acceptance": 0.9970059880239521, "accepted_drafts": 666, "active_memory_gib": 31.813567525707185, "block_size": 3, "cache_memory_gib": 82.18594096973538, "decode_s": 100.14749887499784, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 668, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 31.90066777355969, "prefill_s": 0.4200237909972202, "row_distribution_evals": 0, "speedup_vs_ar": 0.7270302993175131, "target_distribution_modes": { "batched_logits": 334 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.02364034196943976, "draft": 2.0620414960430935, "rollback": 0.00019853397679980844, "target_distribution": 97.66836021310883, "target_hidden": 0.0, "verify": 0.3714816799911205 }, "tok_s": 9.985271836375869, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ], "verify_calls": 334 }, "4": { "acceptance": 1.0, "accepted_drafts": 750, "active_memory_gib": 31.815169698558748, "block_size": 4, "cache_memory_gib": 82.184341263026, "decode_s": 88.162400583009, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 750, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 31.913927708752453, "prefill_s": 0.45891812500485685, "row_distribution_evals": 0, "speedup_vs_ar": 0.8258652849911423, "target_distribution_modes": { "batched_logits": 250 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.012636876970645972, "draft": 2.101732614086359, "rollback": 0.00013187690638005733, "target_distribution": 85.75181395103573, "target_hidden": 0.0, "verify": 0.27962454402586445 }, "tok_s": 11.342703844122909, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ], "verify_calls": 250 }, "5": { "acceptance": 0.99875, "accepted_drafts": 799, "active_memory_gib": 31.814849263988435, "block_size": 5, "cache_memory_gib": 79.50701175443828, "decode_s": 73.88629216598929, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 800, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 31.925461772829294, "prefill_s": 0.5138492080004653, "row_distribution_evals": 0, "speedup_vs_ar": 0.9854367291759348, "target_distribution_modes": { "batched_logits": 200 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.011033208094886504, "draft": 1.9875367559725419, "rollback": 0.0001222430873895064, "target_distribution": 71.65346937690629, "target_hidden": 0.0, "verify": 0.22101375696365722 }, "tok_s": 13.534310231097393, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ], "verify_calls": 200 }, "6": { "acceptance": 0.9976047904191617, "accepted_drafts": 833, "active_memory_gib": 31.81573427375406, "block_size": 6, "cache_memory_gib": 66.49232691712677, "decode_s": 29.219117750006262, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 835, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 31.937621283344924, "prefill_s": 0.584632291996968, "row_distribution_evals": 0, "speedup_vs_ar": 2.491870791785778, "target_distribution_modes": { "batched_logits": 167 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.009512208969681524, "draft": 1.5696202969993465, "rollback": 9.41639591474086e-05, "target_distribution": 27.45037787995534, "target_hidden": 0.0, "verify": 0.17939917097100988 }, "tok_s": 34.22416818179891, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ], "verify_calls": 167 } }, "passed": true, "prompt_id": "flappy_html5_canvas_game", "prompt_tokens": 119, "sampler": { "seed": 0, "temperature": 1.0, "top_k": 64, "top_p": 0.95 } }, "status": "passed" }