{ "arch_id": "gemma4-assistant-mtp", "artifacts": { "assistant_dtype": null, "assistant_format": "q6-g64-affine", "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx", "assistant_quantization": { "bits": 6, "group_size": 64, "mode": "affine" }, "disk_ok": true, "min_free_gib": 220.0, "observed_free_gib": 704.0457382202148, "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx", "target_quantization": { "bits": 4, "format": "mlx-flat4-g64", "group_size": 64, "mode": "affine" } }, "backend": "gemma4_assistant", "benchmark": { "draft_block_sizes": [ 3, 4, 5, 6 ], "draft_sampler": { "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.", "inherits_target_sampler": true, "temperature": null, "top_k": null, "top_p": null }, "max_mode": true, "max_tokens": 1000, "profile": "sustained", "prompt_suite": "mtplx/benchmarks/prompts/flappy.jsonl", "reasoning": "off", "sampler_source": { "do_sample": true, "local_reference": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx/generation_config.json", "name": "official Gemma 4 generation_config.json", "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "seed": 0, "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "blockers": [ "best speedup 1.995x is below 2.000x", "median confirmation speedup 1.995x is below 2.000x" ], "can_run_now": true, "gates": { "generated_tokens": 1000, "longer_lengths_blocked_until_160_passes": true, "median_of_3_min_speedup_vs_ar": 2.0, "min_speedup_vs_ar": 2.0, "mtp_peak_memory_lte_ar_multiplier": 1.18, "mtp_peak_memory_lte_ar_plus_gib": 6 }, "official_sources": { "assistant": "google/gemma-4-31B-it-assistant", "assistant_revision": "cffbbd2cea41ea56a0fa5b0487e0d445121fd204", "target": "google/gemma-4-31B-it", "target_revision": "145dc2508c480a64b47242f160d286cff94a2343" }, "pair": { "assistant_exists": true, "assistant_inspection": { "architecture": "Gemma4AssistantForCausalLM", "architecture_recognized": true, "backbone_hidden_size": 5376, "compatibility": { "arch_id": "gemma4-assistant-mtp", "can_run": false, "exit_code": 3, "message": "Official-style Gemma 4 31B assistant artifact recognized. This is an assistant-backed MTP pair, not a standalone target; MTPLX scaffold is present but QA and the 160-token speed/memory gate are still pending.", "mtp_supported": "recognized", "recognized": true, "recommended_backend": "gemma4_assistant", "recommended_profile": "performance-cold", "runtime_compatibility": "assistant-pair-qa-pending", "runtime_contract": null, "runtime_contract_error": null, "runtime_contract_path": null, "support_level": "architecture-scaffolded-qa-pending", "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.", "supported": false, "tier": "architecture-compatible-but-unverified", "unsafe_force_required": false, "unverified_model": true }, "config_exists": true, "hidden_size": 1024, "layer_types": [ "sliding_attention", "sliding_attention", "sliding_attention", "full_attention" ], "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx", "model_files": [ "model.safetensors" ], "model_type": "gemma4_assistant", "mtp": { "exists": false, "expected_tensor_count": 15, "extra_keys": [], "metadata_only": true, "missing_expected_keys": [], "mtp_file": "model.safetensors.index.json::embedded", "passes_tensor_gate": false, "sidecar_format": "bf16", "tensor_count": 0, "tensors": [] }, "mtp_arch": "gemma4-assistant-mtp", "mtp_num_hidden_layers": 0, "mtp_pattern": null, "mtp_supported": "recognized", "num_hidden_layers": 4, "num_kv_shared_layers": 4, "passes_primary_gate": false, "quantization": { "bits": 6, "group_size": 64, "mode": "affine" }, "recommended_backend": "gemma4_assistant", "recommended_profile": "performance-cold", "runtime_compatibility": "assistant-pair-qa-pending", "runtime_contract_path": null, "sidecars": { "preprocessor_config.json": false, "processor_config.json": false, "video_preprocessor_config.json": false }, "source": "local", "support_level": "architecture-scaffolded-qa-pending", "support_notes": "Assistant-backed scaffold for the official dense Gemma 4 31B pair. It remains QA-pending and is not a public runnable backend until 160-token exactness, speed, and memory gates pass.", "unverified_model": true, "use_ordered_embeddings": false, "vocab_size": 262144 }, "assistant_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx", "pair_error": null, "pair_valid": true, "target_exists": true, "target_inspection": { "architecture": "Gemma4ForConditionalGeneration", "architecture_recognized": false, "backbone_hidden_size": null, "compatibility": { "arch_id": null, "can_run": false, "exit_code": 2, "message": "Model has no MTP head. MTPLX requires an MTP-equipped model.", "mtp_supported": "no", "recognized": false, "recommended_backend": null, "recommended_profile": null, "runtime_compatibility": "unsupported", "runtime_contract": null, "runtime_contract_error": null, "runtime_contract_path": null, "support_level": "unsupported", "support_notes": null, "supported": false, "tier": "no-MTP", "unsafe_force_required": false, "unverified_model": false }, "config_exists": true, "hidden_size": 5376, "layer_types": [ "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "sliding_attention", "full_attention" ], "model_dir": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx", "model_files": [ "model-00001-of-00004.safetensors", "model-00002-of-00004.safetensors", "model-00003-of-00004.safetensors", "model-00004-of-00004.safetensors" ], "model_type": "gemma4_text", "mtp": { "exists": false, "expected_tensor_count": 15, "extra_keys": [], "metadata_only": true, "missing_expected_keys": [], "mtp_file": "model.safetensors.index.json::embedded", "passes_tensor_gate": false, "sidecar_format": "bf16", "tensor_count": 0, "tensors": [] }, "mtp_arch": null, "mtp_num_hidden_layers": 0, "mtp_pattern": null, "mtp_supported": "no", "num_hidden_layers": 60, "num_kv_shared_layers": 0, "passes_primary_gate": false, "quantization": { "bits": 4, "group_size": 64, "mode": "affine" }, "recommended_backend": null, "recommended_profile": null, "runtime_compatibility": "unsupported", "runtime_contract_path": null, "sidecars": { "preprocessor_config.json": false, "processor_config.json": false, "video_preprocessor_config.json": false }, "source": "local", "support_level": "unsupported", "support_notes": null, "unverified_model": false, "use_ordered_embeddings": null, "vocab_size": 262144 }, "target_model": "/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx" }, "planned_commands": { "assistant_bf16_snapshot": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it-assistant', revision='cffbbd2cea41ea56a0fa5b0487e0d445121fd204', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx')\"", "gate": "mtplx bench gemma-mtp --target-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx --assistant-model /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-assistant-google-q6-g64-mlx --profile sustained --max --prompt-suite mtplx/benchmarks/prompts/flappy.jsonl --max-tokens 1000 --temperature 1.0 --top-p 0.95 --top-k 64 --seed 0 --reasoning off --draft-block-sizes 3,4,5,6 --json --output outputs/gemma4/flappy1000-q6assistant-pure-sweep.json", "target_flat4_g64": "uv run python -m mlx_lm.convert --hf-path /Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48 --mlx-path /Users/youssof/Documents/MTPLX/models/gemma-4-31B-it-google-flat4-g64-mlx --quantize --q-bits 4 --q-group-size 64 --q-mode affine", "target_revision_download": "uv run python -c \"from huggingface_hub import snapshot_download; snapshot_download(repo_id='google/gemma-4-31B-it', revision='145dc2508c480a64b47242f160d286cff94a2343', repo_type='model', local_dir='/Users/youssof/Documents/MTPLX/models/.sources/gemma-4-31B-it-145dc2508c48')\"" }, "qa_pending": true, "results": { "ar": { "active_memory_gib": 16.96541445143521, "cache_memory_gib": 1.9738727556541562, "decode_s": 45.80454516700411, "generated_tokens": 1000, "mode": "ar", "peak_memory_gib": 17.040886400267482, "prefill_s": 1.318873333002557, "tok_s": 21.831894550071045, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ] }, "ar_confirmation": [ { "active_memory_gib": 16.96541445143521, "cache_memory_gib": 1.9738727556541562, "decode_s": 45.80454516700411, "generated_tokens": 1000, "mode": "ar", "peak_memory_gib": 17.040886400267482, "prefill_s": 1.318873333002557, "tok_s": 21.831894550071045, "token_preview": [ 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761, 9996, 625, 24731, 236761 ] } ], "best_block_confirmation": [ { "acceptance": 0.9810874704491725, "accepted_drafts": 830, "active_memory_gib": 17.41092054359615, "block_size": 6, "cache_memory_gib": 67.33365368191153, "decode_s": 22.95747512500384, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 846, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 17.566345684230328, "prefill_s": 0.2074422090081498, "row_distribution_evals": 0, "speedup_vs_ar": 1.9951908873949589, "target_distribution_modes": { "batched_logits": 170 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.011550506053026766, "draft": 1.5224984570086235, "rollback": 0.0001419630425516516, "target_distribution": 21.21943625298445, "target_hidden": 0.0, "verify": 0.19109521005884744 }, "tok_s": 43.558797060869416, "token_preview": [ 9996, 625, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759 ], "verify_calls": 170 } ], "best_block_size": 6, "best_speedup": 1.9951908873949589, "blockers": [ "best speedup 1.995x is below 2.000x", "median confirmation speedup 1.995x is below 2.000x" ], "draft_sampler": { "exactness_note": "Assistant q may differ from target p; MTPLX remains exact because acceptance uses p/q and rejection samples the residual distribution.", "inherits_target_sampler": true, "temperature": null, "top_k": null, "top_p": null }, "fan_restore": { "after": { "actual_max_rpm": 6643, "actual_min_rpm": 6265, "capacity_max_rpm": 7826, "capacity_min_rpm": 7826, "fans": [ { "actual_rpm": 6265, "max_capacity_rpm": 7826, "mode": "auto", "raw": { "actual_rpm": 6265, "index": 0, "max_rpm": 7826, "min_rpm": 2317, "mode": "auto", "target_rpm": 5768 }, "rpm": 6265, "target_rpm": 5768 }, { "actual_rpm": 6643, "max_capacity_rpm": 7826, "mode": "auto", "raw": { "actual_rpm": 6643, "index": 1, "max_rpm": 7826, "min_rpm": 2317, "mode": "auto", "target_rpm": 6229 }, "rpm": 6643, "target_rpm": 6229 } ], "max_rpm": 6643, "min_rpm": 6265, "ok": true, "raw": { "attempts": [ { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 6265,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5768\n },\n {\n \"actual_rpm\" : 6643,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6229\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.3,\n \"TCDX\" : 73.6,\n \"TCHP\" : 61.1,\n \"TCMb\" : 88.7,\n \"TG0B\" : 32.4,\n \"TG0H\" : 32,\n \"TG0V\" : 32.4,\n \"TH0x\" : 40.9,\n \"TMVR\" : 66.4,\n \"TPDX\" : 68.6,\n \"TRDX\" : 81.9,\n \"TS0P\" : 72.8,\n \"Tg0j\" : 77,\n \"Tm08\" : 73.7,\n \"Tp04\" : 74.2,\n \"Tp08\" : 73.9,\n \"Tp0C\" : 74.8,\n \"Tp0G\" : 75.2,\n \"Tp0X\" : 75\n }\n}" } ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "ok": true, "status": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 6265,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5768\n },\n {\n \"actual_rpm\" : 6643,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6229\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.3,\n \"TCDX\" : 73.6,\n \"TCHP\" : 61.1,\n \"TCMb\" : 88.7,\n \"TG0B\" : 32.4,\n \"TG0H\" : 32,\n \"TG0V\" : 32.4,\n \"TH0x\" : 40.9,\n \"TMVR\" : 66.4,\n \"TPDX\" : 68.6,\n \"TRDX\" : 81.9,\n \"TS0P\" : 72.8,\n \"Tg0j\" : 77,\n \"Tm08\" : 73.7,\n \"Tp04\" : 74.2,\n \"Tp08\" : 73.9,\n \"Tp0C\" : 74.8,\n \"Tp0G\" : 75.2,\n \"Tp0X\" : 75\n }\n}" } }, "target_max_rpm": 6229, "target_min_rpm": 5768 }, "message": "fan profile restored", "ok": true, "profile": "silent", "set_result": { "attempts": [ { "command": [ "sudo", "-n", "/Users/youssof/.mtplx/bin/thermalforge", "auto" ], "ok": true, "returncode": 0, "stderr": "No matching processes were found", "stdout": "Fans reset to Apple defaults" } ], "command": [ "sudo", "-n", "/Users/youssof/.mtplx/bin/thermalforge", "auto" ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "dry_run": false, "ok": true, "profile": "silent" } }, "fanmax": { "after": { "actual_max_rpm": 7477, "actual_min_rpm": 7385, "capacity_max_rpm": 7826, "capacity_min_rpm": 7826, "fans": [ { "actual_rpm": 7385, "max_capacity_rpm": 7826, "mode": "manual", "raw": { "actual_rpm": 7385, "index": 0, "max_rpm": 7826, "min_rpm": 2317, "mode": "manual", "target_rpm": 7826 }, "rpm": 7385, "target_rpm": 7826 }, { "actual_rpm": 7477, "max_capacity_rpm": 7826, "mode": "manual", "raw": { "actual_rpm": 7477, "index": 1, "max_rpm": 7826, "min_rpm": 2317, "mode": "manual", "target_rpm": 7826 }, "rpm": 7477, "target_rpm": 7826 } ], "max_rpm": 7477, "min_rpm": 7385, "ok": true, "raw": { "attempts": [ { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7385,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7477,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.8,\n \"TCDX\" : 64.5,\n \"TCHP\" : 56,\n \"TCMb\" : 71.5,\n \"TG0B\" : 32.8,\n \"TG0H\" : 32,\n \"TG0V\" : 32.8,\n \"TH0x\" : 38.8,\n \"TMVR\" : 55.8,\n \"TPDX\" : 60.8,\n \"TRDX\" : 60.9,\n \"TS0P\" : 61.7,\n \"Tg0j\" : 61,\n \"Tm08\" : 61.6,\n \"Tp04\" : 64.6,\n \"Tp08\" : 64,\n \"Tp0C\" : 65.5,\n \"Tp0G\" : 65.3,\n \"Tp0X\" : 64\n }\n}" } ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "ok": true, "status": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 7385,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n },\n {\n \"actual_rpm\" : 7477,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"manual\",\n \"target_rpm\" : 7826\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.8,\n \"TCDX\" : 64.5,\n \"TCHP\" : 56,\n \"TCMb\" : 71.5,\n \"TG0B\" : 32.8,\n \"TG0H\" : 32,\n \"TG0V\" : 32.8,\n \"TH0x\" : 38.8,\n \"TMVR\" : 55.8,\n \"TPDX\" : 60.8,\n \"TRDX\" : 60.9,\n \"TS0P\" : 61.7,\n \"Tg0j\" : 61,\n \"Tm08\" : 61.6,\n \"Tp04\" : 64.6,\n \"Tp08\" : 64,\n \"Tp0C\" : 65.5,\n \"Tp0G\" : 65.3,\n \"Tp0X\" : 64\n }\n}" } }, "target_max_rpm": 7826, "target_min_rpm": 7826 }, "baseline": { "actual_max_rpm": 6286, "actual_min_rpm": 5825, "capacity_max_rpm": 7826, "capacity_min_rpm": 7826, "fans": [ { "actual_rpm": 5825, "max_capacity_rpm": 7826, "mode": "auto", "raw": { "actual_rpm": 5825, "index": 0, "max_rpm": 7826, "min_rpm": 2317, "mode": "auto", "target_rpm": 5822 }, "rpm": 5825, "target_rpm": 5822 }, { "actual_rpm": 6286, "max_capacity_rpm": 7826, "mode": "auto", "raw": { "actual_rpm": 6286, "index": 1, "max_rpm": 7826, "min_rpm": 2317, "mode": "auto", "target_rpm": 6287 }, "rpm": 6286, "target_rpm": 6287 } ], "max_rpm": 6286, "min_rpm": 5825, "ok": true, "raw": { "attempts": [ { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5825,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5822\n },\n {\n \"actual_rpm\" : 6286,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6287\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.8,\n \"TCDX\" : 63.9,\n \"TCHP\" : 56.4,\n \"TCMb\" : 73.3,\n \"TG0B\" : 32.8,\n \"TG0H\" : 32,\n \"TG0V\" : 32.8,\n \"TH0x\" : 38.8,\n \"TMVR\" : 56.4,\n \"TPDX\" : 61.2,\n \"TRDX\" : 61.4,\n \"TS0P\" : 62.7,\n \"Tg0j\" : 61.3,\n \"Tm08\" : 61.7,\n \"Tp04\" : 63.4,\n \"Tp08\" : 62.9,\n \"Tp0C\" : 63.8,\n \"Tp0G\" : 64,\n \"Tp0X\" : 63.1\n }\n}" } ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "ok": true, "status": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "status" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "{\n \"fans\" : [\n {\n \"actual_rpm\" : 5825,\n \"index\" : 0,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 5822\n },\n {\n \"actual_rpm\" : 6286,\n \"index\" : 1,\n \"max_rpm\" : 7826,\n \"min_rpm\" : 2317,\n \"mode\" : \"auto\",\n \"target_rpm\" : 6287\n }\n ],\n \"temperatures\" : {\n \"TAOL\" : 34.7,\n \"TB0T\" : 32.8,\n \"TCDX\" : 63.9,\n \"TCHP\" : 56.4,\n \"TCMb\" : 73.3,\n \"TG0B\" : 32.8,\n \"TG0H\" : 32,\n \"TG0V\" : 32.8,\n \"TH0x\" : 38.8,\n \"TMVR\" : 56.4,\n \"TPDX\" : 61.2,\n \"TRDX\" : 61.4,\n \"TS0P\" : 62.7,\n \"Tg0j\" : 61.3,\n \"Tm08\" : 61.7,\n \"Tp04\" : 63.4,\n \"Tp08\" : 62.9,\n \"Tp0C\" : 63.8,\n \"Tp0G\" : 64,\n \"Tp0X\" : 63.1\n }\n}" } }, "target_max_rpm": 6287, "target_min_rpm": 5822 }, "message": "fans ramped to max (actual 7385-7477 RPM; target 7826 RPM)", "ok": true, "profile": "max", "set_result": { "attempts": [ { "command": [ "sudo", "-n", "/Users/youssof/.mtplx/bin/thermalforge", "max" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "Fan 0: 5825 RPM \u2192 max (7826 RPM)\nFan 1: 6286 RPM \u2192 max (7826 RPM)" } ], "command": [ "sudo", "-n", "/Users/youssof/.mtplx/bin/thermalforge", "max" ], "detection": { "available": true, "clock_anchor_enabled": false, "clock_anchor_policy": "explicit experimental only; never used for product claims", "instructions": "Install ThermalForge and ensure the thermalforge CLI is on PATH.", "selected": { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } }, "tools": [ { "kind": "thermalforge", "path": "/Users/youssof/.mtplx/bin/thermalforge", "version": { "command": [ "/Users/youssof/.mtplx/bin/thermalforge", "--version" ], "ok": true, "returncode": 0, "stderr": "", "stdout": "0.1.0" } } ] }, "dry_run": false, "ok": true, "profile": "max" } }, "max_tokens": 1000, "median_confirmation_speedup": 1.9951908873949589, "memory_gate": { "ar_peak_memory_gib": 17.040886400267482, "lte_ar_plus_6_gib": true, "lte_ar_times_1_18": true, "mtp_peak_memory_gib": 17.566345684230328 }, "mtp_by_block_size": { "3": { "acceptance": 0.991044776119403, "accepted_drafts": 664, "active_memory_gib": 17.409974498674273, "block_size": 3, "cache_memory_gib": 96.58854798320681, "decode_s": 37.873455042004935, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 670, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 17.5211139684543, "prefill_s": 0.3529969999945024, "row_distribution_evals": 0, "speedup_vs_ar": 1.2094102615196556, "target_distribution_modes": { "batched_logits": 335 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.0206884491344681, "draft": 1.4088243619044079, "rollback": 0.0002345078537473455, "target_distribution": 36.03245366179908, "target_hidden": 0.0, "verify": 0.3872644162038341 }, "tok_s": 26.403717297270966, "token_preview": [ 9996, 625, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759 ], "verify_calls": 335 }, "4": { "acceptance": 0.9548969072164949, "accepted_drafts": 741, "active_memory_gib": 17.411866588518023, "block_size": 4, "cache_memory_gib": 96.58724461961538, "decode_s": 36.01139437498932, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 776, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 17.544619735330343, "prefill_s": 0.2180854579928564, "row_distribution_evals": 0, "speedup_vs_ar": 1.2719458927371148, "target_distribution_modes": { "batched_logits": 259 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.01497975607344415, "draft": 1.4912097119231476, "rollback": 0.0002762930525932461, "target_distribution": 34.18285454783472, "target_hidden": 0.0, "verify": 0.3010120859835297 }, "tok_s": 27.768988603632668, "token_preview": [ 9996, 625, 759, 759, 759, 759, 759, 759, 759, 236761, 5715, 236789, 236751, 506, 3772, 236787 ], "verify_calls": 259 }, "5": { "acceptance": 0.8822197055492639, "accepted_drafts": 779, "active_memory_gib": 17.411210460588336, "block_size": 5, "cache_memory_gib": 82.11562378518283, "decode_s": 24.697525832991232, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 883, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 17.551317367702723, "prefill_s": 0.21940916699531954, "row_distribution_evals": 0, "speedup_vs_ar": 1.8546208019678587, "target_distribution_modes": { "batched_logits": 221 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.013920457829954103, "draft": 1.5722365041146986, "rollback": 0.00047238002298399806, "target_distribution": 22.840641582908574, "target_hidden": 0.0, "verify": 0.2542634050187189 }, "tok_s": 40.489885778930486, "token_preview": [ 9996, 625, 759, 759, 759, 759, 759, 759, 759, 236761, 759, 759, 759, 759, 759, 759 ], "verify_calls": 221 }, "6": { "acceptance": 0.9810874704491725, "accepted_drafts": 830, "active_memory_gib": 17.41092054359615, "block_size": 6, "cache_memory_gib": 67.33365368191153, "decode_s": 22.95747512500384, "draft_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "drafted_tokens": 846, "generated_tokens": 1000, "mode": "mtp", "peak_memory_gib": 17.566345684230328, "prefill_s": 0.2074422090081498, "row_distribution_evals": 0, "speedup_vs_ar": 1.9951908873949589, "target_distribution_modes": { "batched_logits": 170 }, "target_sampler": { "temperature": 1.0, "top_k": 64, "top_p": 0.95 }, "telemetry": { "ar_dense_fallback_calls": 0, "decode_dense_fallback_calls": 0, "dense_fallback_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "events": [], "paged_active_array_calls_by_phase": { "ar_decode": 0, "decode_verify": 0, "postcommit": 0, "prefill": 0, "unknown": 0 }, "paged_attention_bailouts_by_phase_reason": { "ar_decode": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "decode_verify": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "postcommit": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "prefill": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 }, "unknown": { "batch_not_1": 0, "block_size_mismatch": 0, "blocks_invalid": 0, "dtype_unsupported": 0, "empty_cache": 0, "head_dim_unsupported": 0, "kernel_unavailable": 0, "offset_invalid": 0, "partitioned_invalid_output": 0, "partitioned_unavailable": 0, "q_len_gt_max": 0, "q_len_invalid": 0, "turboquant_unsupported": 0, "unknown": 0, "unsupported_mask": 0 } }, "paged_attention_large_q_path": { "dense_forbidden": 0, "large_q_split_sdpa_fallback": 0, "partitioned_paged": 0, "tail_paged": 0, "unknown": 0 }, "postcommit_dense_fallback_calls": 0, "prefill_dense_fallback_calls": 0, "trace_events": false }, "timing_s": { "accept": 0.011550506053026766, "draft": 1.5224984570086235, "rollback": 0.0001419630425516516, "target_distribution": 21.21943625298445, "target_hidden": 0.0, "verify": 0.19109521005884744 }, "tok_s": 43.558797060869416, "token_preview": [ 9996, 625, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759, 759 ], "verify_calls": 170 } }, "passed": false, "prompt_id": "flappy_html5_canvas_game", "prompt_tokens": 119, "sampler": { "seed": 0, "temperature": 1.0, "top_k": 64, "top_p": 0.95 } }, "status": "failed" }