// Runtime smoke test for the TAF Agent logic modules. // Exercises the public functions with representative + edge-case inputs so that // a future change which breaks a return shape or throws at runtime is caught // immediately (syntax checks alone do not catch these). Pure-logic modules only // — no DOM, no network. Run: node tests/smoke.mjs import assert from "node:assert"; import { gammaCheckAll, classifyRegime } from "../js/gamma_check.js"; import { predictNIAHReasoning, sweepContextLengths } from "../js/niah_reasoning.js"; import { computeArenaCI, parseVotesCSV, SAMPLE_VOTES_CSV } from "../js/arena_ci.js"; import { computeContaminationPrior } from "../js/contamination_prior.js"; import { predictQuantShift } from "../js/quant_regime.js"; import { computeDriftBound } from "../js/cross_drift.js"; import { planExtension } from "../js/yarn_planner.js"; import { planLaunch } from "../js/launch_flags.js"; import { analyzeGguf } from "../js/gguf_bridge.js"; import { unmaskConfig } from "../js/swa_unmasker.js"; let pass = 0, fail = 0; function check(name, fn) { try { fn(); console.log(" PASS", name); pass++; } catch (e) { console.log(" FAIL", name, "—", e.message); fail++; } } console.log("\n[gamma_check] regime classification"); check("γ=1.02 → phase_b (not fraud)", () => assert.equal(gammaCheckAll({ theta: 10000, T: 2048, gObs: 1.02, isRandom: false }).regime, "phase_b")); check("γ=1.0 → phase_b", () => assert.equal(classifyRegime(NaN, 1.0, false), "phase_b")); check("γ=0.75 → not fraud", () => assert.notEqual(gammaCheckAll({ theta: 10000, T: 2048, gObs: 0.75 }).regime, "fraud")); check("γ>1 random → swa", () => assert.equal(classifyRegime(NaN, 1.1, true), "swa")); check("NaN γ → unknown", () => assert.equal(classifyRegime(1, NaN, false), "unknown")); console.log("\n[niah_reasoning] extrapolation axis (no d_horizon tautology)"); check("no d_horizon, has extrapolation_ratio, niah∈[0,1]", () => { const r = predictNIAHReasoning({ rope_theta: 10000, max_position_embeddings: 8192, num_attention_heads: 32, hidden_size: 4096, num_key_value_heads: 8 }, 32768); assert.ok(!("d_horizon" in r), "d_horizon should be gone"); assert.ok("extrapolation_ratio" in r); assert.ok(Number.isFinite(r.niah_rate) && r.niah_rate >= 0 && r.niah_rate <= 1); assert.ok(Number.isFinite(r.reasoning_rate)); }); check("config WITHOUT rope_scaling does not throw", () => { const r = predictNIAHReasoning({ rope_theta: 10000, max_position_embeddings: 8192 }, 4096); assert.ok(Number.isFinite(r.niah_rate)); }); check("within context → high niah", () => assert.ok(predictNIAHReasoning({ rope_theta: 10000, max_position_embeddings: 32768 }, 4096).niah_rate > 0.8)); check("YaRN boosts θ (40000)", () => assert.equal(predictNIAHReasoning({ rope_theta: 10000, max_position_embeddings: 8192, rope_scaling: { rope_type: "yarn", factor: 4 } }, 32768).theta, 40000)); check("linear extends T_train, keeps θ", () => { const r = predictNIAHReasoning({ rope_theta: 10000, max_position_embeddings: 8192, rope_scaling: { rope_type: "linear", factor: 4 } }, 32768); assert.equal(r.theta, 10000); assert.equal(r.T_train, 32768); }); check("sweep returns rows", () => assert.ok(sweepContextLengths({ rope_theta: 10000, max_position_embeddings: 8192 }).length > 0)); console.log("\n[arena_ci] bradley-terry + two-sided ties"); check("sample CSV → ratings + ties arrays", () => { const r = computeArenaCI(parseVotesCSV(SAMPLE_VOTES_CSV)); assert.ok(Array.isArray(r.ratings) && Array.isArray(r.ties)); for (const t of r.ties) assert.ok(t.model_a && t.model_b); }); check("empty votes → safe empty", () => assert.equal(computeArenaCI([]).ratings.length, 0)); console.log("\n[contamination_prior] uncalibrated risk score honesty"); check("calibrated:false + clamped boolean + risk≤0.97", () => { const r = computeContaminationPrior("2024-12", "mmlu"); assert.equal(r.calibrated, false); assert.equal(typeof r.clamped, "boolean"); assert.ok(r.prior <= 0.97); }); check("high case clamps at 0.97", () => { const r = computeContaminationPrior("2024-12", "squad"); assert.equal(r.clamped, true); assert.equal(r.prior, 0.97); }); check("released after cutoff → low", () => assert.ok(computeContaminationPrior("2020-01", "aime24").prior < 0.3)); console.log("\n[quant_regime] ΔPPL never negative"); check("tiny model ΔPPL ≥ 0 (sizeBoost clamp)", () => { const r = predictQuantShift({ hidden_size: 128, num_hidden_layers: 2, vocab_size: 1000, num_attention_heads: 4 }, "gguf_q2_k"); assert.ok(r.delta_ppl.mid >= 0 && r.delta_ppl.low >= 0 && r.delta_ppl.high >= 0); }); check("normal model valid regime + ΔPPL≥0", () => { const r = predictQuantShift({ hidden_size: 4096, num_hidden_layers: 32, vocab_size: 128000, num_attention_heads: 32, num_key_value_heads: 8 }, "awq"); assert.ok(["safe", "mild", "significant", "cliff"].includes(r.regime)); assert.ok(r.delta_ppl.mid >= 0); }); console.log("\n[cross_drift] template flag is boolean (regression guard)"); const drift = (a, b) => computeDriftBound({ score: a, dtype: "fp16", framework: "hf", batch: 1, chat_template: "llama3" }, { score: b, dtype: "fp16", framework: "hf", batch: 1, chat_template: "chatml" }); check("diff templates same score → noise + boolean true", () => { const r = drift(80, 80); assert.equal(typeof r.breakdown.template_mismatch, "boolean"); assert.equal(r.breakdown.template_mismatch, true); assert.equal(r.verdict, "noise"); assert.equal(r.dominant_cause, "template_differs_no_effect"); }); check("render expr (main.js) no throw on boolean", () => { const r = drift(80, 80); assert.equal(r.breakdown.template_mismatch ? "show" : "", "show"); }); check("big gap diff templates → bug_template", () => assert.equal(drift(80, 30).verdict, "bug_template")); check("same config → noise + false", () => { const r = computeDriftBound({ score: 80, dtype: "fp16", framework: "hf", batch: 1, chat_template: "llama3" }, { score: 80, dtype: "fp16", framework: "hf", batch: 1, chat_template: "llama3" }); assert.equal(r.verdict, "noise"); assert.equal(r.breakdown.template_mismatch, false); }); console.log("\n[yarn_planner] d_horizon removed from verdict"); check("no dHorizon fields, gammaEff finite, valid verdict", () => { const r = planExtension({ originalCtx: 8192, theta: 10000, targetCtx: 32768, ropeType: "yarn" }); assert.ok(!("dHorizonEff" in r) && !("dHorizonNaive" in r)); assert.ok(Number.isFinite(r.gammaEff)); assert.ok(["healthy", "usable_with_care", "needs_finetune", "degrades"].includes(r.verdict)); }); check("within context → no_extension_needed", () => assert.equal(planExtension({ originalCtx: 32768, theta: 10000, targetCtx: 8192 }).verdict, "no_extension_needed")); console.log("\n[launch_flags] kv_wasted relabel"); check("target ≫ trained → kv_wasted (not horizon_wasted)", () => { const r = planLaunch({ nLayers: 32, nKvHeads: 8, headDim: 128, hidden: 4096, ropeTheta: 10000, ctxTrain: 8192, vramGB: 24, targetCtx: 32768, quant: "Q4_K_M" }); assert.ok(r.warnings.some(w => w.code === "kv_wasted")); assert.ok(!r.warnings.some(w => w.code === "horizon_wasted")); }); check("within trained → no kv_wasted", () => { const r = planLaunch({ nLayers: 32, nKvHeads: 8, headDim: 128, hidden: 4096, ropeTheta: 10000, ctxTrain: 8192, vramGB: 24, targetCtx: 8000, quant: "Q4_K_M" }); assert.ok(!r.warnings.some(w => w.code === "kv_wasted")); }); console.log("\n[gguf_bridge] θ nullable → incomplete"); check("normal → gammaTrain finite, no dHoriz/reaches", () => { const r = analyzeGguf({ rope_theta: 10000, context_length: 8192, architecture: "llama", num_attention_heads: 32, num_key_value_heads: 8 }, 8192); assert.ok(["healthy", "usable_with_care", "degrades"].includes(r.verdict)); assert.ok(Number.isFinite(r.gammaTrain)); assert.ok(!("dHoriz" in r) && !("reaches" in r)); }); check("missing rope_theta → incomplete + theta null", () => { const r = analyzeGguf({ context_length: 8192, architecture: "llama" }, 8192); assert.equal(r.verdict, "incomplete"); assert.equal(r.theta, null); }); console.log("\n[swa_unmasker] n_attn=0 guard"); check("n_attn=0 → no throw", () => assert.ok(unmaskConfig({ hidden_size: 4096, num_attention_heads: 0, num_hidden_layers: 32 }))); check("normal SWA config → no throw", () => assert.ok(unmaskConfig({ hidden_size: 4096, num_attention_heads: 32, num_key_value_heads: 8, num_hidden_layers: 32, sliding_window: 4096 }))); console.log(`\n==== smoke: ${pass} passed, ${fail} failed ====`); process.exit(fail ? 1 : 0);