Spaces:

karlexmarin
/

taf-agent

Running

karlexmarin Claude Opus 4.7 (1M context) commited on May 2

Commit

c76c38e

1 Parent(s): fed77dc

fix: demote δ_SWA to disabled; flag δ_post_IH and δ_instruct exploratory

Cross-panel audit (panel.csv n=22 mongo-corpus rows with γ_Padé) shows:

δ_GQA = +0.11 ✓ replicates (group-mean +0.115 on n=9 yes / 13 no)
δ_SWA = -0.21 ✗ ORIGINALLY FIT ON n=1 (single SWA model in panel)
group-mean is +0.355 on the single yes-case
→ demoted to 0.0 with status='exploratory_n1_disabled'
δ_post_IH = -0.15 ⚠ does NOT replicate (group-mean ~0 on n=16 yes / 6 no)
→ kept but tagged 'exploratory_no_replication'
δ_instruct = -0.10 ⚠ n=3 yes / 19 no, p=0.06 (already commented as tentative)
→ tagged 'tentative_n3_p0.06'

Both gamma_decompose (v1, paper sesión 28) and gamma_decompose_v2
(paper sesión 29) now emit calibration_warning + per-axis status fields
so downstream consumers can detect which corrections are reliable.

Tests: 22/22 pass including δ_SWA disabled regression.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (2) hide show

python/taf_browser.py +36 -10
tests/test_taf_formulas.py +29 -0

python/taf_browser.py CHANGED Viewed

@@ -22,16 +22,34 @@ def gamma_pade(theta: float, T_eval: int) -> float:
 def gamma_decompose(gamma_pade_val, has_GQA=False, has_SWA=False, n_params=0.0) -> dict:
-    """§26.10 — 5-axis decomposition (n=23 OLS, paper sesión 28)."""
     delta_GQA = +0.11 if has_GQA else 0.0
-    delta_SWA = -0.21 if has_SWA else 0.0
     delta_post_IH = -0.15 if n_params >= 4e8 else 0.0
     return {
-        "pade_centroid":   gamma_pade_val,
-        "delta_GQA":       delta_GQA,
-        "delta_SWA":       delta_SWA,
-        "delta_post_IH":   delta_post_IH,
-        "gamma_corrected": gamma_pade_val + delta_GQA + delta_SWA + delta_post_IH,
     }
@@ -212,18 +230,21 @@ def gamma_decompose_v2(gamma_pade_val: float, n_params_M: float,
     """§28.3 — 6-axis decomposition (sesión 29 update with imprint axis).
     γ_obs = γ_pade
-           + ν·log_10(P/P_0)·𝟙[corpus=random]    ← NEW imprint axis (DERIVED)
            + Δ_corpus(text-rand)
            + δ_arch(GQA, SWA)
            + δ_circuit(IH phase)
            + δ_train(steps, RLHF, instruct)
            + ε
-    Imprint axis activates only on RANDOM input. TEXT input dominated by corpus.
     """
     delta_imprint = NU_IMPRINT * math.log10(max(n_params_M, 1e-3) / P_0_IMPRINT_M) \
                     if corpus == "random" else 0.0
     delta_GQA = +0.11 if has_GQA else 0.0
-    delta_SWA = -0.21 if has_SWA else 0.0
     delta_post_IH = -0.15 if n_params_M >= 400 else 0.0
     delta_instruct = -0.10 if is_instruct else 0.0  # F9 tentative (n=3, p=0.06)
     return {
@@ -231,12 +252,17 @@ def gamma_decompose_v2(gamma_pade_val: float, n_params_M: float,
         "delta_imprint":       delta_imprint,
         "delta_GQA":           delta_GQA,
         "delta_SWA":           delta_SWA,
         "delta_post_IH":       delta_post_IH,
         "delta_instruct":      delta_instruct,
         "gamma_corrected":     gamma_pade_val + delta_imprint + delta_GQA
                                 + delta_SWA + delta_post_IH + delta_instruct,
         "corpus":              corpus,
         "axes":                ["pade", "imprint", "GQA", "SWA", "IH", "instruct"],
     }

 def gamma_decompose(gamma_pade_val, has_GQA=False, has_SWA=False, n_params=0.0) -> dict:
+    """§26.10 — 5-axis decomposition (n=23 OLS, paper sesión 28).
+    Calibration audit (2026-05-02 panel re-check):
+      δ_GQA      = +0.11   ✓ replicates (group-mean +0.115 on n=9/13)
+      δ_SWA      = -0.21   ⚠ ORIGINALLY FIT ON n=1; demoted to None here
+                              (insufficient data; group-mean +0.355 with single yes-case).
+                              Returning δ_SWA = 0 with `delta_SWA_status: 'exploratory_n1'`
+                              instead of applying an unreliable correction.
+      δ_post_IH  = -0.15   ⚠ does NOT replicate (group-mean ≈ 0 on n=16/6);
+                              kept but flagged 'exploratory'.
+      δ_instruct  not in this v1; v2 has -0.10 with n=3, p=0.06 caveat.
+    """
     delta_GQA = +0.11 if has_GQA else 0.0
+    # SWA: demoted — original constant rested on n=1.
+    delta_SWA = 0.0
+    delta_SWA_status = "exploratory_n1_disabled" if has_SWA else "not_applicable"
     delta_post_IH = -0.15 if n_params >= 4e8 else 0.0
     return {
+        "pade_centroid":         gamma_pade_val,
+        "delta_GQA":             delta_GQA,
+        "delta_SWA":             delta_SWA,
+        "delta_SWA_status":      delta_SWA_status,
+        "delta_post_IH":         delta_post_IH,
+        "delta_post_IH_status":  "exploratory_no_replication" if delta_post_IH != 0 else "not_applicable",
+        "gamma_corrected":       gamma_pade_val + delta_GQA + delta_SWA + delta_post_IH,
+        "calibration_warning":  ("SWA correction disabled (originally fit on n=1). "
+                                 "post_IH correction marked exploratory (group-mean ≈ 0 in re-audit). "
+                                 "GQA correction replicates."),
     }
     """§28.3 — 6-axis decomposition (sesión 29 update with imprint axis).
     γ_obs = γ_pade
+           + ν·log_10(P/P_0)·𝟙[corpus=random]    ← NEW imprint axis (DERIVED, n=22, err 0.3%)
            + Δ_corpus(text-rand)
            + δ_arch(GQA, SWA)
            + δ_circuit(IH phase)
            + δ_train(steps, RLHF, instruct)
            + ε
+    Calibration audit 2026-05-02:
+      δ_GQA solid; δ_SWA demoted (n=1); δ_post_IH exploratory; δ_instruct exploratory (n=3).
     """
     delta_imprint = NU_IMPRINT * math.log10(max(n_params_M, 1e-3) / P_0_IMPRINT_M) \
                     if corpus == "random" else 0.0
     delta_GQA = +0.11 if has_GQA else 0.0
+    # SWA disabled: originally fit on n=1.
+    delta_SWA = 0.0
     delta_post_IH = -0.15 if n_params_M >= 400 else 0.0
     delta_instruct = -0.10 if is_instruct else 0.0  # F9 tentative (n=3, p=0.06)
     return {
         "delta_imprint":       delta_imprint,
         "delta_GQA":           delta_GQA,
         "delta_SWA":           delta_SWA,
+        "delta_SWA_status":    "exploratory_n1_disabled" if has_SWA else "not_applicable",
         "delta_post_IH":       delta_post_IH,
+        "delta_post_IH_status": "exploratory_no_replication" if delta_post_IH != 0 else "not_applicable",
         "delta_instruct":      delta_instruct,
+        "delta_instruct_status": "tentative_n3_p0.06" if delta_instruct != 0 else "not_applicable",
         "gamma_corrected":     gamma_pade_val + delta_imprint + delta_GQA
                                 + delta_SWA + delta_post_IH + delta_instruct,
         "corpus":              corpus,
         "axes":                ["pade", "imprint", "GQA", "SWA", "IH", "instruct"],
+        "calibration_warning":  ("SWA disabled (n=1). post_IH/instruct marked exploratory. "
+                                 "GQA + imprint axes are the most reliable."),
     }

tests/test_taf_formulas.py CHANGED Viewed

@@ -21,6 +21,7 @@ from diagnose_model import (  # type: ignore
 )
 from taf_browser import (  # type: ignore
     gamma_pade, d_horizon, theta_design, df_window,
 )
@@ -241,3 +242,31 @@ def test_theta_eff_pade_definition():
     for theta in (10000, 500000, 1_000_000):
         for T in (1000, 2000):
             assert abs(theta_eff_pade(theta, T) - (theta + T / math.sqrt(2))) < 1e-9

 )
 from taf_browser import (  # type: ignore
     gamma_pade, d_horizon, theta_design, df_window,
+    gamma_decompose, gamma_decompose_v2,
 )
     for theta in (10000, 500000, 1_000_000):
         for T in (1000, 2000):
             assert abs(theta_eff_pade(theta, T) - (theta + T / math.sqrt(2))) < 1e-9
+# ─────────────────────────────────────────────────────────────────────────
+# gamma_decompose: audit-driven calibration changes
+# ─────────────────────────────────────────────────────────────────────────
+def test_decompose_SWA_disabled():
+    """δ_SWA was originally fit on n=1 — must NOT apply correction; status flagged."""
+    result = gamma_decompose(0.75, has_SWA=True)
+    assert result["delta_SWA"] == 0.0
+    assert "n1_disabled" in result["delta_SWA_status"]
+def test_decompose_GQA_still_active():
+    """δ_GQA replicates in panel re-audit (+0.115 vs +0.11 hardcoded)."""
+    on = gamma_decompose(0.75, has_GQA=True)
+    off = gamma_decompose(0.75, has_GQA=False)
+    assert abs(on["delta_GQA"] - 0.11) < 1e-9
+    assert off["delta_GQA"] == 0.0
+def test_decompose_v2_warnings_present():
+    """v2 must emit calibration_warning."""
+    r = gamma_decompose_v2(0.75, n_params_M=500, has_SWA=True, is_instruct=True)
+    assert "calibration_warning" in r
+    assert r["delta_SWA"] == 0.0  # disabled
+    assert "exploratory" in r["delta_SWA_status"] or "n1" in r["delta_SWA_status"]