Spaces:
Running
fix: demote δ_SWA to disabled; flag δ_post_IH and δ_instruct exploratory
Browse filesCross-panel audit (panel.csv n=22 mongo-corpus rows with γ_Padé) shows:
δ_GQA = +0.11 ✓ replicates (group-mean +0.115 on n=9 yes / 13 no)
δ_SWA = -0.21 ✗ ORIGINALLY FIT ON n=1 (single SWA model in panel)
group-mean is +0.355 on the single yes-case
→ demoted to 0.0 with status='exploratory_n1_disabled'
δ_post_IH = -0.15 ⚠ does NOT replicate (group-mean ~0 on n=16 yes / 6 no)
→ kept but tagged 'exploratory_no_replication'
δ_instruct = -0.10 ⚠ n=3 yes / 19 no, p=0.06 (already commented as tentative)
→ tagged 'tentative_n3_p0.06'
Both gamma_decompose (v1, paper sesión 28) and gamma_decompose_v2
(paper sesión 29) now emit calibration_warning + per-axis status fields
so downstream consumers can detect which corrections are reliable.
Tests: 22/22 pass including δ_SWA disabled regression.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
- python/taf_browser.py +36 -10
- tests/test_taf_formulas.py +29 -0
|
@@ -22,16 +22,34 @@ def gamma_pade(theta: float, T_eval: int) -> float:
|
|
| 22 |
|
| 23 |
|
| 24 |
def gamma_decompose(gamma_pade_val, has_GQA=False, has_SWA=False, n_params=0.0) -> dict:
|
| 25 |
-
"""§26.10 — 5-axis decomposition (n=23 OLS, paper sesión 28).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
delta_GQA = +0.11 if has_GQA else 0.0
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
delta_post_IH = -0.15 if n_params >= 4e8 else 0.0
|
| 29 |
return {
|
| 30 |
-
"pade_centroid":
|
| 31 |
-
"delta_GQA":
|
| 32 |
-
"delta_SWA":
|
| 33 |
-
"
|
| 34 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
}
|
| 36 |
|
| 37 |
|
|
@@ -212,18 +230,21 @@ def gamma_decompose_v2(gamma_pade_val: float, n_params_M: float,
|
|
| 212 |
"""§28.3 — 6-axis decomposition (sesión 29 update with imprint axis).
|
| 213 |
|
| 214 |
γ_obs = γ_pade
|
| 215 |
-
+ ν·log_10(P/P_0)·𝟙[corpus=random] ← NEW imprint axis (DERIVED)
|
| 216 |
+ Δ_corpus(text-rand)
|
| 217 |
+ δ_arch(GQA, SWA)
|
| 218 |
+ δ_circuit(IH phase)
|
| 219 |
+ δ_train(steps, RLHF, instruct)
|
| 220 |
+ ε
|
| 221 |
-
|
|
|
|
|
|
|
| 222 |
"""
|
| 223 |
delta_imprint = NU_IMPRINT * math.log10(max(n_params_M, 1e-3) / P_0_IMPRINT_M) \
|
| 224 |
if corpus == "random" else 0.0
|
| 225 |
delta_GQA = +0.11 if has_GQA else 0.0
|
| 226 |
-
|
|
|
|
| 227 |
delta_post_IH = -0.15 if n_params_M >= 400 else 0.0
|
| 228 |
delta_instruct = -0.10 if is_instruct else 0.0 # F9 tentative (n=3, p=0.06)
|
| 229 |
return {
|
|
@@ -231,12 +252,17 @@ def gamma_decompose_v2(gamma_pade_val: float, n_params_M: float,
|
|
| 231 |
"delta_imprint": delta_imprint,
|
| 232 |
"delta_GQA": delta_GQA,
|
| 233 |
"delta_SWA": delta_SWA,
|
|
|
|
| 234 |
"delta_post_IH": delta_post_IH,
|
|
|
|
| 235 |
"delta_instruct": delta_instruct,
|
|
|
|
| 236 |
"gamma_corrected": gamma_pade_val + delta_imprint + delta_GQA
|
| 237 |
+ delta_SWA + delta_post_IH + delta_instruct,
|
| 238 |
"corpus": corpus,
|
| 239 |
"axes": ["pade", "imprint", "GQA", "SWA", "IH", "instruct"],
|
|
|
|
|
|
|
| 240 |
}
|
| 241 |
|
| 242 |
|
|
|
|
| 22 |
|
| 23 |
|
| 24 |
def gamma_decompose(gamma_pade_val, has_GQA=False, has_SWA=False, n_params=0.0) -> dict:
|
| 25 |
+
"""§26.10 — 5-axis decomposition (n=23 OLS, paper sesión 28).
|
| 26 |
+
|
| 27 |
+
Calibration audit (2026-05-02 panel re-check):
|
| 28 |
+
δ_GQA = +0.11 ✓ replicates (group-mean +0.115 on n=9/13)
|
| 29 |
+
δ_SWA = -0.21 ⚠ ORIGINALLY FIT ON n=1; demoted to None here
|
| 30 |
+
(insufficient data; group-mean +0.355 with single yes-case).
|
| 31 |
+
Returning δ_SWA = 0 with `delta_SWA_status: 'exploratory_n1'`
|
| 32 |
+
instead of applying an unreliable correction.
|
| 33 |
+
δ_post_IH = -0.15 ⚠ does NOT replicate (group-mean ≈ 0 on n=16/6);
|
| 34 |
+
kept but flagged 'exploratory'.
|
| 35 |
+
δ_instruct not in this v1; v2 has -0.10 with n=3, p=0.06 caveat.
|
| 36 |
+
"""
|
| 37 |
delta_GQA = +0.11 if has_GQA else 0.0
|
| 38 |
+
# SWA: demoted — original constant rested on n=1.
|
| 39 |
+
delta_SWA = 0.0
|
| 40 |
+
delta_SWA_status = "exploratory_n1_disabled" if has_SWA else "not_applicable"
|
| 41 |
delta_post_IH = -0.15 if n_params >= 4e8 else 0.0
|
| 42 |
return {
|
| 43 |
+
"pade_centroid": gamma_pade_val,
|
| 44 |
+
"delta_GQA": delta_GQA,
|
| 45 |
+
"delta_SWA": delta_SWA,
|
| 46 |
+
"delta_SWA_status": delta_SWA_status,
|
| 47 |
+
"delta_post_IH": delta_post_IH,
|
| 48 |
+
"delta_post_IH_status": "exploratory_no_replication" if delta_post_IH != 0 else "not_applicable",
|
| 49 |
+
"gamma_corrected": gamma_pade_val + delta_GQA + delta_SWA + delta_post_IH,
|
| 50 |
+
"calibration_warning": ("SWA correction disabled (originally fit on n=1). "
|
| 51 |
+
"post_IH correction marked exploratory (group-mean ≈ 0 in re-audit). "
|
| 52 |
+
"GQA correction replicates."),
|
| 53 |
}
|
| 54 |
|
| 55 |
|
|
|
|
| 230 |
"""§28.3 — 6-axis decomposition (sesión 29 update with imprint axis).
|
| 231 |
|
| 232 |
γ_obs = γ_pade
|
| 233 |
+
+ ν·log_10(P/P_0)·𝟙[corpus=random] ← NEW imprint axis (DERIVED, n=22, err 0.3%)
|
| 234 |
+ Δ_corpus(text-rand)
|
| 235 |
+ δ_arch(GQA, SWA)
|
| 236 |
+ δ_circuit(IH phase)
|
| 237 |
+ δ_train(steps, RLHF, instruct)
|
| 238 |
+ ε
|
| 239 |
+
|
| 240 |
+
Calibration audit 2026-05-02:
|
| 241 |
+
δ_GQA solid; δ_SWA demoted (n=1); δ_post_IH exploratory; δ_instruct exploratory (n=3).
|
| 242 |
"""
|
| 243 |
delta_imprint = NU_IMPRINT * math.log10(max(n_params_M, 1e-3) / P_0_IMPRINT_M) \
|
| 244 |
if corpus == "random" else 0.0
|
| 245 |
delta_GQA = +0.11 if has_GQA else 0.0
|
| 246 |
+
# SWA disabled: originally fit on n=1.
|
| 247 |
+
delta_SWA = 0.0
|
| 248 |
delta_post_IH = -0.15 if n_params_M >= 400 else 0.0
|
| 249 |
delta_instruct = -0.10 if is_instruct else 0.0 # F9 tentative (n=3, p=0.06)
|
| 250 |
return {
|
|
|
|
| 252 |
"delta_imprint": delta_imprint,
|
| 253 |
"delta_GQA": delta_GQA,
|
| 254 |
"delta_SWA": delta_SWA,
|
| 255 |
+
"delta_SWA_status": "exploratory_n1_disabled" if has_SWA else "not_applicable",
|
| 256 |
"delta_post_IH": delta_post_IH,
|
| 257 |
+
"delta_post_IH_status": "exploratory_no_replication" if delta_post_IH != 0 else "not_applicable",
|
| 258 |
"delta_instruct": delta_instruct,
|
| 259 |
+
"delta_instruct_status": "tentative_n3_p0.06" if delta_instruct != 0 else "not_applicable",
|
| 260 |
"gamma_corrected": gamma_pade_val + delta_imprint + delta_GQA
|
| 261 |
+ delta_SWA + delta_post_IH + delta_instruct,
|
| 262 |
"corpus": corpus,
|
| 263 |
"axes": ["pade", "imprint", "GQA", "SWA", "IH", "instruct"],
|
| 264 |
+
"calibration_warning": ("SWA disabled (n=1). post_IH/instruct marked exploratory. "
|
| 265 |
+
"GQA + imprint axes are the most reliable."),
|
| 266 |
}
|
| 267 |
|
| 268 |
|
|
@@ -21,6 +21,7 @@ from diagnose_model import ( # type: ignore
|
|
| 21 |
)
|
| 22 |
from taf_browser import ( # type: ignore
|
| 23 |
gamma_pade, d_horizon, theta_design, df_window,
|
|
|
|
| 24 |
)
|
| 25 |
|
| 26 |
|
|
@@ -241,3 +242,31 @@ def test_theta_eff_pade_definition():
|
|
| 241 |
for theta in (10000, 500000, 1_000_000):
|
| 242 |
for T in (1000, 2000):
|
| 243 |
assert abs(theta_eff_pade(theta, T) - (theta + T / math.sqrt(2))) < 1e-9
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
)
|
| 22 |
from taf_browser import ( # type: ignore
|
| 23 |
gamma_pade, d_horizon, theta_design, df_window,
|
| 24 |
+
gamma_decompose, gamma_decompose_v2,
|
| 25 |
)
|
| 26 |
|
| 27 |
|
|
|
|
| 242 |
for theta in (10000, 500000, 1_000_000):
|
| 243 |
for T in (1000, 2000):
|
| 244 |
assert abs(theta_eff_pade(theta, T) - (theta + T / math.sqrt(2))) < 1e-9
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 248 |
+
# gamma_decompose: audit-driven calibration changes
|
| 249 |
+
# ─────────────────────────────────────────────────────────────────────────
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def test_decompose_SWA_disabled():
|
| 253 |
+
"""δ_SWA was originally fit on n=1 — must NOT apply correction; status flagged."""
|
| 254 |
+
result = gamma_decompose(0.75, has_SWA=True)
|
| 255 |
+
assert result["delta_SWA"] == 0.0
|
| 256 |
+
assert "n1_disabled" in result["delta_SWA_status"]
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def test_decompose_GQA_still_active():
|
| 260 |
+
"""δ_GQA replicates in panel re-audit (+0.115 vs +0.11 hardcoded)."""
|
| 261 |
+
on = gamma_decompose(0.75, has_GQA=True)
|
| 262 |
+
off = gamma_decompose(0.75, has_GQA=False)
|
| 263 |
+
assert abs(on["delta_GQA"] - 0.11) < 1e-9
|
| 264 |
+
assert off["delta_GQA"] == 0.0
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def test_decompose_v2_warnings_present():
|
| 268 |
+
"""v2 must emit calibration_warning."""
|
| 269 |
+
r = gamma_decompose_v2(0.75, n_params_M=500, has_SWA=True, is_instruct=True)
|
| 270 |
+
assert "calibration_warning" in r
|
| 271 |
+
assert r["delta_SWA"] == 0.0 # disabled
|
| 272 |
+
assert "exploratory" in r["delta_SWA_status"] or "n1" in r["delta_SWA_status"]
|