karlexmarin Claude Opus 4.7 (1M context) commited on
Commit
e9f9ac5
·
1 Parent(s): 5c94f4b

v0.8.6+ niah: config.json fetch gets mirror fallback (gated → unsloth)

Browse files

Live HF Space probe of v0.8.6 hit the same wall the spec-decode
mode hit before: pasting meta-llama/Llama-3.1-70B-Instruct into
the 🔍 NIAH→Reason mode returned a "🔒 gated" status because
fetchHfConfig used /raw/main/ and had no fallback. RULER
calibration is most useful for top-tier gated models, so this
defeated the v0.8.6 ship for the canonical demo case.

Reused the unsloth mirror chain from spec-decode, applied to
config.json:
1. Try `/resolve/main/config.json` on user's id (also fixes any
future LFS-tracked configs and is consistent with spec-decode).
2. On 401/403: try the four unsloth patterns — `unsloth/{name}`,
`unsloth/Meta-{name}`, `unsloth/{name}-bnb-4bit`,
`unsloth/Meta-{name}-bnb-4bit`. First success wins.
3. On other errors: surface as before (404/network/parse_failed).

The mirror id is stamped on the returned config as `__via_mirror`
so callers can render a "fetched via mirror" hint without breaking
backwards compatibility (existing modes that ignore the field
behave identically).

niahFetchConfig surfaces the mirror name in the status line. Free
upgrade for Profile / Compare / Unmask / Quant — they all use
fetchHfConfig and now succeed on Llama / Mistral / Gemma without
HF auth.

Verified locally: meta-llama/Llama-3.1-70B-Instruct → fetched via
mirror unsloth/Meta-Llama-3.1-70B-Instruct → RULER calibration
fires → heuristic 94% reasoning vs RULER-calibrated 74% (-20 pp).
The calibration caught the heuristic's optimism — exactly the
finding the v0.8.6 ship was meant to surface.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. js/main.js +63 -12
js/main.js CHANGED
@@ -479,20 +479,63 @@ function fillRecipeForm(p) {
479
  // ════════════════════════════════════════════════════════════════════
480
  // HF Hub fetch (any model)
481
  // ════════════════════════════════════════════════════════════════════
482
- async function fetchHfConfig(modelId) {
483
- const url = `https://huggingface.co/${modelId}/raw/main/config.json`;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  const resp = await fetch(url);
485
- if (!resp.ok) {
486
- if (resp.status === 401 || resp.status === 403) {
487
- // Mark this so callers can render a clickable accept-license link.
488
- const err = new Error(`🔒 ${modelId} is gated accept license at https://huggingface.co/${modelId}`);
489
- err.code = "gated";
490
- err.modelId = modelId;
491
- throw err;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
492
  }
493
- throw new Error(`HTTP ${resp.status} config.json not found at ${url}`);
 
 
 
494
  }
495
- return await resp.json();
 
496
  }
497
 
498
  $("hf-fetch-btn").addEventListener("click", async () => {
@@ -1404,8 +1447,16 @@ async function niahFetchConfig() {
1404
  try {
1405
  const cfg = await fetchHfConfig(modelId);
1406
  __niahLastConfig = cfg;
 
 
 
 
1407
  __niahLastModelId = modelId;
1408
- $("niah-status").textContent = tFmt("niah.status.fetched", { modelId });
 
 
 
 
1409
  return cfg;
1410
  } catch (err) {
1411
  if (err.code === "gated") {
 
479
  // ════════════════════════════════════════════════════════════════════
480
  // HF Hub fetch (any model)
481
  // ════════════════════════════════════════════════════════════════════
482
+ // Build the same unsloth mirror candidates used in spec-decode. Lets us
483
+ // fetch config.json for gated families (Llama / Mistral / Gemma) without
484
+ // requiring HF auth — the unsloth redistributions are public and ship the
485
+ // original config.json verbatim (they only quantize weights, not metadata).
486
+ function _hfMirrorCandidates(modelId) {
487
+ const last = modelId.split("/").slice(-1)[0];
488
+ if (!last) return [];
489
+ const out = [
490
+ `unsloth/${last}`,
491
+ last.startsWith("Meta-") ? null : `unsloth/Meta-${last}`,
492
+ `unsloth/${last}-bnb-4bit`,
493
+ last.startsWith("Meta-") ? null : `unsloth/Meta-${last}-bnb-4bit`,
494
+ ].filter(c => c && c !== modelId);
495
+ // Dedupe in case last starts with Meta- already.
496
+ return [...new Set(out)];
497
+ }
498
+
499
+ async function _tryConfigUrl(modelId) {
500
+ // /resolve/main/ rather than /raw/main/ — same lesson as spec-decode:
501
+ // /resolve follows LFS for large files (irrelevant for config.json which
502
+ // is always small, but consistent & future-proof). CORS is granted on both.
503
+ const url = `https://huggingface.co/${modelId}/resolve/main/config.json`;
504
  const resp = await fetch(url);
505
+ if (!resp.ok) return { ok: false, status: resp.status };
506
+ try {
507
+ const j = await resp.json();
508
+ return { ok: true, data: j };
509
+ } catch (e) {
510
+ return { ok: false, error: "parse_failed" };
511
+ }
512
+ }
513
+
514
+ async function fetchHfConfig(modelId) {
515
+ // 1. Try the user-pasted id directly.
516
+ let r = await _tryConfigUrl(modelId);
517
+ if (r.ok) return r.data;
518
+
519
+ // 2. On 401/403, try open-mirror fallback (unsloth/...). On other
520
+ // errors (404/network/parse), surface as before — mirror won't help.
521
+ if (r.status === 401 || r.status === 403) {
522
+ for (const cand of _hfMirrorCandidates(modelId)) {
523
+ const m = await _tryConfigUrl(cand);
524
+ if (m.ok) {
525
+ // Stamp the mirror id so callers can surface a "fetched via mirror"
526
+ // hint if they want; backwards-compatible with code that ignores it.
527
+ m.data.__via_mirror = cand;
528
+ m.data.__mirror_of = modelId;
529
+ return m.data;
530
+ }
531
  }
532
+ const err = new Error(`🔒 ${modelId} is gated accept license at https://huggingface.co/${modelId}`);
533
+ err.code = "gated";
534
+ err.modelId = modelId;
535
+ throw err;
536
  }
537
+
538
+ throw new Error(`HTTP ${r.status} — config.json not found at https://huggingface.co/${modelId}/resolve/main/config.json`);
539
  }
540
 
541
  $("hf-fetch-btn").addEventListener("click", async () => {
 
1447
  try {
1448
  const cfg = await fetchHfConfig(modelId);
1449
  __niahLastConfig = cfg;
1450
+ // Keep the user-pasted id for RULER lookup (it has the canonical
1451
+ // alias mapping). The mirror id is recorded in cfg.__via_mirror
1452
+ // for any UI that wants to surface "fetched via mirror" — niah
1453
+ // status string already shows it below.
1454
  __niahLastModelId = modelId;
1455
+ if (cfg.__via_mirror) {
1456
+ $("niah-status").innerHTML = `${tFmt("niah.status.fetched", { modelId })} <span class="subtle" style="color:#d29922;">(via mirror <code>${cfg.__via_mirror}</code>)</span>`;
1457
+ } else {
1458
+ $("niah-status").textContent = tFmt("niah.status.fetched", { modelId });
1459
+ }
1460
  return cfg;
1461
  } catch (err) {
1462
  if (err.code === "gated") {