File size: 37,784 Bytes
6ab0441
 
 
 
 
7bbf792
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ab0441
 
 
 
 
ed3d534
137cb0a
ed3d534
 
 
 
137cb0a
 
 
 
0386760
6ab0441
0386760
 
 
 
 
 
 
 
 
137cb0a
0386760
6ab0441
137cb0a
 
 
0386760
137cb0a
d0a945b
6ab0441
 
d0a945b
 
 
 
31b1415
d0a945b
31b1415
 
 
 
d0a945b
c969a03
31b1415
 
c969a03
31b1415
 
c969a03
 
d0a945b
abea671
d0a945b
31b1415
 
d0a945b
 
 
 
31b1415
 
d0a945b
 
 
 
 
31b1415
 
d0a945b
 
 
 
31b1415
 
d0a945b
 
 
 
31b1415
 
d0a945b
 
 
 
abea671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c969a03
 
 
 
 
 
 
 
 
 
 
31b1415
d0a945b
31b1415
 
 
 
d0a945b
 
31b1415
 
d0a945b
 
 
31b1415
 
d0a945b
 
 
31b1415
d0a945b
31b1415
 
 
 
 
 
d0a945b
 
31b1415
d0a945b
31b1415
 
 
d0a945b
 
31b1415
 
d0a945b
 
31b1415
 
959e23c
 
d0a945b
 
 
6ab0441
ed3d534
 
 
 
 
 
 
6ab0441
 
 
2dbc41f
 
0386760
 
 
 
2dbc41f
 
6ab0441
2dbc41f
 
c11b76c
2dbc41f
 
b4f7029
 
6ab0441
2dbc41f
0386760
 
 
 
 
 
 
137cb0a
 
 
 
2dbc41f
0386760
 
 
 
 
 
 
 
ed3d534
0386760
 
6ab0441
0386760
 
ed3d534
0386760
ed3d534
0386760
 
 
 
ed3d534
0386760
ed3d534
0386760
ed3d534
0386760
 
 
 
 
2dbc41f
0386760
 
 
2dbc41f
0386760
 
 
2dbc41f
0386760
 
 
2dbc41f
0386760
 
 
2dbc41f
0386760
 
 
2dbc41f
0386760
 
 
2dbc41f
0386760
 
 
2dbc41f
0386760
 
 
2dbc41f
0386760
2dbc41f
 
0386760
 
 
 
ed3d534
6ab0441
 
c11b76c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0386760
 
137cb0a
 
 
 
2dbc41f
0386760
 
 
 
 
 
ed3d534
0386760
 
 
 
 
ed3d534
0386760
ed3d534
0386760
 
 
 
ed3d534
0386760
 
 
 
 
 
 
 
ed3d534
0386760
0637db4
 
 
0386760
0637db4
0386760
 
 
0637db4
 
 
0386760
0637db4
0386760
 
 
0637db4
 
 
0386760
0637db4
0386760
 
 
 
ed3d534
0386760
 
ed3d534
0386760
ed3d534
 
 
 
6ab0441
ed3d534
 
6ab0441
 
 
b4f7029
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ab0441
 
ed3d534
6ab0441
ed3d534
6ab0441
 
 
 
 
 
ed3d534
6ab0441
 
ed3d534
6ab0441
ed3d534
6ab0441
 
 
 
ed3d534
 
 
 
 
6ab0441
 
 
 
 
ed3d534
6ab0441
 
0386760
6ab0441
ed3d534
6ab0441
 
449213a
ed3d534
449213a
 
 
0386760
 
ed3d534
 
6ab0441
 
ed3d534
6ab0441
 
0386760
 
 
ed3d534
0386760
 
 
 
 
ed3d534
0386760
 
449213a
 
 
 
 
 
 
 
 
 
 
 
 
 
c11b76c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6ab0441
 
 
137cb0a
 
 
6ab0441
 
137cb0a
 
6ab0441
 
137cb0a
6ab0441
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0" />
  <title>TAF Agent — Test ANY Transformer LLM in Your Browser</title>
  <meta name="description" content="Free, auditable diagnostic for transformer LLMs. Predict viability (long-context, KV compression, training budget, hardware) from config alone. Runs entirely in your browser. No server, no auth, no cost." />
  <meta name="keywords" content="transformer, LLM, diagnostic, RoPE, NIAH, KV cache, viability, free, browser, GPU, NeurIPS, TAF" />
  <meta name="author" content="Carles Marin" />

  <!-- OpenGraph for social sharing (Twitter, LinkedIn, WhatsApp, Discord, etc.) -->
  <meta property="og:type" content="website" />
  <meta property="og:url" content="https://karlesmarin.github.io/tafagent/" />
  <meta property="og:title" content="TAF Agent — Test ANY Transformer LLM in Your Browser" />
  <meta property="og:description" content="Free, auditable transformer LLM diagnostic. 5 recipes, 5 modes, 4 languages. Runs in your browser. No server, no auth, $0/month forever." />
  <meta property="og:site_name" content="TAF Agent" />

  <!-- Twitter Card -->
  <meta name="twitter:card" content="summary_large_image" />
  <meta name="twitter:title" content="TAF Agent — Test ANY Transformer LLM in Your Browser" />
  <meta name="twitter:description" content="Free, auditable transformer LLM diagnostic. 5 recipes, 5 modes, 4 languages. Runs in your browser. $0 forever." />

  <!-- Theme color for browser UI -->
  <meta name="theme-color" content="#0a0e14" />

  <link rel="stylesheet" href="style.css" />
  <script src="https://cdn.jsdelivr.net/pyodide/v0.26.4/full/pyodide.js"></script>
</head>
<body>
  <header>
    <!-- Language switcher (top-right, round flags) -->
    <div class="lang-switcher">
      <button class="lang-btn" data-lang="en" data-label="English" title="English">🇬🇧</button>
      <button class="lang-btn" data-lang="es" data-label="Español" title="Español">🇪🇸</button>
      <button class="lang-btn" data-lang="fr" data-label="Français" title="Français">🇫🇷</button>
      <button class="lang-btn" data-lang="zh" data-label="中文" title="中文">🇨🇳</button>
    </div>

    <h1 data-i18n="hero.title">🔬 TAF Agent</h1>
    <p class="tagline" data-i18n="hero.tagline">
      Test <strong>ANY</strong> transformer LLM before you spend GPU/$.
    </p>
    <div class="arch-badges">
      <span class="badge">✓ RoPE-MHA</span>
      <span class="badge">✓ RoPE-GQA</span>
      <span class="badge">✓ ALiBi</span>
      <span class="badge">✓ AbsPE</span>
      <span class="badge">✓ SWA</span>
      <span class="badge">✓ SSM (Mamba)</span>
      <span class="badge">✓ Any HuggingFace public model</span>
    </div>
    <p class="subtle" style="margin-top:0.75rem;" data-i18n="hero.subtitle">
      All computation runs locally in your browser. Free. Unlimited. Auditable.
    </p>
    <p class="subtle" style="margin-top:0.25rem; font-size:0.85rem;" data-i18n="hero.about">
      Built by an independent researcher. Open source. Not affiliated with any model vendor.
    </p>
    <p style="margin-top:0.75rem;">
      <button id="help-btn" type="button" data-i18n="hero.help">📘 Help & examples</button>
    </p>
  </header>

  <!-- Help modal -->
  <div id="help-modal">
    <div class="help-content">
      <button class="help-close" id="help-close">×</button>
      <h2 data-i18n="help.title">📘 TAF Agent — User Manual</h2>

      <h3 data-i18n="help.what.title">What does it do?</h3>
      <p data-i18n="help.what.body">Predicts <strong>practical viability</strong> of any transformer LLM
      <em>before you spend GPU/$</em>. Answers questions like "will this model work at L=32K?" or
      "should I train custom or use API?" using deterministic Python formulas (TAF — Thermodynamic Attention Framework).</p>

      <h3 data-i18n="help.modes.title">How to use — 7 modes</h3>
      <p data-i18n="help.modes.profile"><strong>📇 Profile</strong>: paste model id → all recipes at once = TAF Card. <strong>Best starting point</strong>.</p>
      <p data-i18n="help.modes.compare"><strong>🆚 Compare</strong>: 2-3 models side-by-side on same recipe. Best when choosing between candidates.</p>
      <p data-i18n="help.modes.inspector"><strong>🔍 Inspect config</strong>: paste raw <code>config.json</code> → tool parses + runs full Profile. For private models, in-development configs, or models not yet on HF Hub.</p>
      <p data-i18n="help.modes.ask"><strong>💬 Ask plain English</strong>: free-form question, in-browser LLM picks the recipe. Best for casual exploration.</p>
      <p data-i18n="help.modes.recipe"><strong>📋 Recipe + form</strong>: manual selection, full parameter control. Best when you want exact control.</p>
      <p data-i18n="help.modes.diagnose"><strong>🩺 Diagnose CLI</strong>: generate Python command to measure γ on your local machine (transformers + numpy). Fast ≈5 min CPU; full ≈20–60 min GPU. Output JSON re-uploadable via Inspect.</p>
      <p data-i18n="help.modes.phase"><strong>📊 Phase diagram</strong>: scatter plot of 23 panel models on (log θ, γ) plane. Hagedorn line γ=1 separates Phase A from Phase B. Click a dot to load that model into Recipe form.</p>

      <h3 data-i18n="help.recipes.title">The 8 recipes available</h3>

      <p data-i18n="help.recipe.x1.title"><strong>X-1 Custom training vs API</strong> — compares cost of training your own model vs paying for API access.</p>
      <div class="help-example" data-i18n="help.recipe.x1.example">
        Try: <em>"Should I train an 8B custom model or use GPT-4o for 50M tokens/month?"</em><br>
        Answer types: YES (custom) / NO (API) with break-even months.
      </div>

      <p data-i18n="help.recipe.x2.title"><strong>X-2 Long Context Viability</strong> — predicts if a model serves a target context length reliably.</p>
      <div class="help-example" data-i18n="help.recipe.x2.example">
        Try: <em>"Will Meta-Llama-3-8B handle 32000 tokens for retrieval?"</em><br>
        Chains: γ_Padé → decomposition → d_horizon → NIAH ceiling → hallucination → KV memory.<br>
        Verdict: YES / DEGRADED / NO with mitigation if needed.
      </div>

      <p data-i18n="help.recipe.x3.title"><strong>X-3 Budget pre-flight</strong> — given $ budget, what model is feasible to train?</p>
      <div class="help-example" data-i18n="help.recipe.x3.example">
        Try: <em>"I have $5000, what model can I train?"</em><br>
        Answer: GO / TINY-MODEL / MEMORY-LIMITED with concrete N (params) and D (tokens).
      </div>

      <p data-i18n="help.recipe.x5.title"><strong>X-5 Hardware selection</strong> — which GPU should I use to serve at target throughput?</p>
      <div class="help-example" data-i18n="help.recipe.x5.example">
        Try: <em>"Cheapest hardware to serve Llama-3-8B at 10M tokens/day"</em><br>
        Answer: best GPU + $/Mtok + capacity vs target.
      </div>

      <p data-i18n="help.recipe.x19.title"><strong>X-19 KV Compression decision</strong> — should I use soft decay, hard cutoff, or literature methods?</p>
      <div class="help-example" data-i18n="help.recipe.x19.example">
        Try: <em>"How to compress KV cache for Qwen2.5-7B at 32K?"</em><br>
        Answer: USE SOFT DECAY / USE D_f CUTOFF / USE LITERATURE METHODS / USE HARD T_train.
      </div>

      <h3 style="margin-top: 1.5em;">— v0.4 (sesión 29 findings) —</h3>

      <p data-i18n="help.section.v04"><strong>What's new in v0.4</strong> (sesión 29 findings 2026-04-28): three diagnostic recipes derived from cross-model panel analysis (n=22 LLMs).</p>

      <p data-i18n="help.recipe.x21.title"><strong>X-21 Imprint Purity Diagnostic</strong> — predicts γ on RANDOM tokens via ν=−1/(2π); how clean is the model's RoPE prediction?</p>
      <div class="help-example" data-i18n="help.recipe.x21.example">
        Try: <em>"How clean is the RoPE prediction on Llama-3-8B?"</em><br>
        Answer: predicted γ_random + purity diagnostic (CLEAN / OVER-IMPRINTED / UNDER-IMPRINTED).
      </div>
      <p data-i18n="help.v04.imprint" style="font-size: 0.9em; opacity: 0.85;"><strong>Learned-imprint slope ν = −1/(2π)</strong>: RoPE rotation period 2π drives a positional bias on weights, proportional to log(N_params). Even random tokens show this scaling. ν is DERIVED — not fitted (empirical err 0.3%).</p>

      <p data-i18n="help.recipe.x22.title"><strong>X-22 Compute-Context Invariant</strong> — does γ × log(N²·D) lie in panel band 51.2 ± 16.8? Detects scaling/training anomalies.</p>
      <div class="help-example" data-i18n="help.recipe.x22.example">
        Try: <em>"Does Mistral-7B fit the compute-context invariant?"</em><br>
        Answer: K = γ·log(N²·D), z-score, IN-BAND or OUTLIER.
      </div>
      <p data-i18n="help.v04.invariant" style="font-size: 0.9em; opacity: 0.85;"><strong>Chinchilla-attention invariant K</strong>: γ × log(N²·D) ≈ 51.2 ± 16.8 (CV=0.329). Connects compute scaling and attention exponent into a single dimensionless number.</p>

      <p data-i18n="help.recipe.x23.title"><strong>X-23 IH-Phase Detector</strong> — pre- or post-induction-head? Cheap probe via sign(γ_text − γ_random).</p>
      <div class="help-example" data-i18n="help.recipe.x23.example">
        Try: <em>"Is Qwen2.5-7B post-induction-head?"</em><br>
        Answer: CONFIRMED PRE-IH / CONFIRMED POST-IH / ANOMALY (with size-vs-Δγ consistency check).
      </div>
      <p data-i18n="help.v04.ih_probe" style="font-size: 0.9em; opacity: 0.85;"><strong>Δγ as IH probe</strong>: sign(γ_text − γ_random) > 0 ⟺ post-induction-head. Cheaper than running an in-context-learning benchmark.</p>

      <p data-i18n="help.v04.constants" style="font-size: 0.9em; opacity: 0.85;"><strong>γ-cluster on famous constants</strong> (intriguing, n=4): CodeLlama-13b γ=0.382 ≈ 1−1/φ (golden conjugate, err 0.0003); pythia-1.4b γ=0.705 ≈ 1/√2; Llama-2-7b γ=0.287 ≈ 1−1/√2; Mistral-Nemo γ=0.428 ≈ log_10(e). Caveat: could be coincidence.</p>

      <h3 style="margin-top: 1.5em;" data-i18n="v04.title">🆕 v0.4 — New diagnostics (sesion 31)</h3>
      <p style="opacity: 0.85;"><em data-i18n="v04.section.intro">Four new diagnostic functions derived sesion 31 (2026-04-30) from cross-of-crosses formula games + Sócratic interrogation. Available in <code>taf_browser.py</code> §33.</em></p>

      <p><strong data-i18n="v04.arch.label">Architectural Concentration</strong><span data-i18n="v04.arch.desc">γ_text ≈ γ_Padé − 0.012·n_kv. Cross-panel correlational law (R²=0.30). Caveat: not per-model predictor.</span></p>

      <p><strong data-i18n="v04.pdi.label">PDI — Padé Deviation Index</strong><span data-i18n="v04.pdi.desc">PDI = d_horizon_obs/T_eval. Traffic light: green (≈1), orange (>>1), yellow (<<1), red (Phase B negative).</span></p>

      <p><strong data-i18n="v04.4bit.label">4-bit Shift Predictor</strong><span data-i18n="v04.4bit.desc">MHA: R²(bf16)<0.9 → γ rises; R²>0.99 → γ drops. GQA: precision-robust regardless.</span></p>

      <p><strong data-i18n="v04.crit.label">Critical Exponents Bundle</strong><span data-i18n="v04.crit.desc">ν_c, β_c, η_c (=γ−1, CORRECTED), α_C, γ_susc with AM-GM minimum at γ=1−1/√2≈0.293.</span></p>

      <h3 data-i18n="help.add_models.title">Adding new models (3 ways)</h3>
      <ul>
        <li data-i18n="help.add_models.preset"><strong>Preset list</strong>: 11 popular models curated. Just select from dropdown.</li>
        <li data-i18n="help.add_models.hf"><strong>HF Hub fetch</strong>: paste any model id (e.g. <code>Qwen/Qwen2.5-32B-Instruct</code>),
          click 📥 Fetch. Browser downloads <code>config.json</code> directly from HuggingFace, fills the form. Works for any public model.</li>
        <li data-i18n="help.add_models.manual"><strong>Manual</strong>: fill the form fields directly with values from the model card.</li>
      </ul>

      <h3 data-i18n="help.audit.title">The audit chain</h3>
      <p data-i18n="help.audit.body">Every result shows the full <strong>Computation Chain</strong> — each formula step with its inputs,
      output, and interpretation. Click any step to expand. Cite section numbers (§26.1, §19.1, etc.) refer
      to the underlying paper for derivation.</p>

      <h3 data-i18n="help.synthesis.title">The plain-English answer</h3>
      <p data-i18n="help.synthesis.body">After the deterministic chain runs, an in-browser LLM (Qwen2.5-0.5B, ~350MB cached after first load)
      synthesizes a plain-English summary. The numbers above are <em>always correct</em> (deterministic Python);
      the synthesis is LLM-generated — verify against the chain if in doubt.</p>

      <h3 data-i18n="help.params.title">Common parameters explained</h3>
      <ul>
        <li data-i18n="help.param.theta"><strong>θ (rope_theta)</strong>: RoPE base frequency. Higher = more long-range capacity. Typical: 10000 (early), 500000 (Llama-3), 1000000 (Qwen2.5).</li>
        <li data-i18n="help.param.T_train"><strong>T_train</strong>: max context the model was trained on. From <code>max_position_embeddings</code>.</li>
        <li data-i18n="help.param.T_eval"><strong>T_eval</strong>: <em>your target</em> inference context length. The key knob.</li>
        <li data-i18n="help.param.gqa"><strong>n_kv_heads &lt; n_attention_heads</strong>: model uses GQA (Grouped Query Attention). Reduces KV memory but pushes γ toward Hagedorn.</li>
        <li data-i18n="help.param.swa"><strong>has_SWA</strong>: model uses Sliding Window Attention (Mistral, gemma-2).</li>
        <li data-i18n="help.param.nparams"><strong>n_params</strong>: total parameter count. Threshold ~400M for induction-head emergence.</li>
      </ul>

      <h3 data-i18n="help.verdicts.title">What to look for in verdicts</h3>
      <ul>
        <li data-i18n="help.verdict.yes"><strong style="color:#3fb950;">YES / GO</strong> — proceed with confidence; numbers support the choice.</li>
        <li data-i18n="help.verdict.deg"><strong style="color:#d29922;">DEGRADED / TINY-MODEL</strong> — works but with caveats; read the action.</li>
        <li data-i18n="help.verdict.no"><strong style="color:#f85149;">NO / MEMORY-LIMITED</strong> — don't proceed as-is; mitigation provided.</li>
      </ul>

      <h3 data-i18n="help.privacy.title">Privacy</h3>
      <p data-i18n="help.privacy.body">Everything runs in your browser. No telemetry, no analytics, no data sent anywhere. Even the LLM model
      runs locally via WebGPU/WebAssembly. Your model_ids and questions never leave this page.</p>

      <h3 data-i18n="help.source.title">Source &amp; paper</h3>
      <p data-i18n="help.source.body">Source code: <a href="https://github.com/karlesmarin/tafagent" target="_blank">github.com/karlesmarin/tafagent</a><br>
      Paper: <em>Marin 2026 — Predicting How Transformers Attend</em> (<a href="https://zenodo.org/records/19826343" target="_blank">Zenodo</a>; arXiv forthcoming)<br>
      Dataset: <a href="https://huggingface.co/datasets/karlexmarin/taf-attention-decay" target="_blank">taf-attention-decay</a> — 58 γ-measurements across 32 models (CC-BY-4.0)</p>
    </div>
  </div>

  <main>
    <!-- Status with loading bar -->
    <section id="status-bar">
      <div id="status" data-i18n="status.loading_pyodide">⏳ Loading Python runtime...</div>
      <div id="loading-bar-wrap" style="display:none;">
        <div id="loading-bar"></div>
      </div>
    </section>

    <!-- Mode toggle -->
    <section id="mode-section">
      <h2><span data-i18n="modes.title">🎯 Mode</span>
        <span class="info"><span class="tooltip" data-i18n="modes.tip"><strong>Four ways to use the tool</strong>.<br>
        <strong>📇 Profile</strong>: paste a model id → all 5 recipes at once = TAF Card.<br>
        <strong>🆚 Compare</strong>: 2-3 models side-by-side on one recipe.<br>
        <strong>💬 Ask</strong>: free-form question, browser LLM picks the recipe.<br>
        <strong>📋 Recipe</strong>: manual selection with full form control.
        </span></span>
      </h2>
      <div class="mode-tabs">
        <button class="mode-btn active" data-mode="profile" data-i18n="modes.profile">📇 Profile a model</button>
        <button class="mode-btn" data-mode="compare" data-i18n="modes.compare">🆚 Compare models</button>
        <button class="mode-btn" data-mode="inspector" data-i18n="modes.inspector">🔍 Inspect config</button>
        <button class="mode-btn" data-mode="ask" data-i18n="modes.ask">💬 Ask plain English</button>
        <button class="mode-btn" data-mode="recipe" data-i18n="modes.recipe">📋 Pick recipe</button>
        <button class="mode-btn" data-mode="diagnose" data-i18n="modes.diagnose">🩺 Diagnose CLI</button>
        <button class="mode-btn" data-mode="phase" data-i18n="modes.phase">📊 Phase diagram</button>
      </div>
      <p id="mode-desc" class="recipe-desc" data-i18n="modes.desc">
        <strong>Quickest start</strong>: paste any HuggingFace model id (e.g. <code>meta-llama/Meta-Llama-3-8B</code>),
        click Profile. See all 5 recipes scored in seconds.
      </p>
    </section>

    <!-- PROFILE mode -->
    <section id="profile-section">
      <div class="quickstart-banner" data-i18n="profile.quickstart">
        💡 Quick start: pick any preset → click Generate. Or paste a model id from <a href='https://huggingface.co/models?library=transformers&sort=trending' target='_blank'>HF Hub trending</a> → 📥 Fetch → Generate.
      </div>
      <h2><span data-i18n="profile.title">📇 Profile a model</span>
        <span class="info"><span class="tooltip" data-i18n="profile.tip">
          <strong>One-click full diagnosis</strong>. Paste any HF model id (or pick preset).
          Tool runs all 5 recipes (long-context, KV-compression, custom-vs-API, budget,
          hardware) and produces a single <strong>TAF Card</strong> showing verdict per
          dimension + key numbers + architecture classification.<br><br>
          <strong>Use case</strong>: "I'm evaluating Qwen2.5-32B for production —
          what's its full viability profile?" → paste id → Profile → done.
        </span></span>
      </h2>
      <p class="recipe-desc" data-i18n="profile.desc">
        <strong>For technicians</strong>: when you need a complete viability snapshot
        of a candidate model. Outputs match paper §sec:gamma_decomposition format.
      </p>

      <div class="form-row">
        <label for="profile-preset" data-i18n="profile.preset_label">Preset:</label>
        <select id="profile-preset" disabled>
          <option value="" data-i18n="profile.preset_default">— or pick from list —</option>
        </select>
      </div>

      <div class="form-row">
        <label for="profile-hf-id" data-i18n="profile.hf_label">HF model id:</label>
        <input type="text" id="profile-hf-id"
          data-i18n-placeholder="profile.hf_placeholder"
          placeholder="e.g. meta-llama/Meta-Llama-3-8B or Qwen/Qwen2.5-7B" style="flex:1;" />
        <button id="profile-fetch-btn" type="button" class="secondary" data-i18n="profile.fetch_btn">📥 Fetch</button>
      </div>
      <div id="profile-hf-status" class="subtle" style="margin: -0.5rem 0 1rem; min-height:1.2em;"></div>

      <div class="form-grid" id="profile-form">
        <div class="form-field">
          <label><span data-i18n="param.theta">θ (rope_theta)</span> <span class="info"><span class="tooltip" data-i18n="param.theta.tip">RoPE base frequency from <code>config.rope_theta</code>.</span></span></label>
          <input type="number" id="profile-theta" value="500000" />
        </div>
        <div class="form-field">
          <label><span data-i18n="param.T_train">T_train</span> <span class="info"><span class="tooltip" data-i18n="param.T_train.tip">Max training context. From <code>max_position_embeddings</code>.</span></span></label>
          <input type="number" id="profile-T_train" value="8192" />
        </div>
        <div class="form-field">
          <label><span data-i18n="param.T_eval">T_eval (your target)</span> <span class="info"><span class="tooltip" data-i18n="param.T_eval.tip">Inference context length you'll actually serve. The key knob.</span></span></label>
          <input type="number" id="profile-T_eval" value="32000" />
        </div>
        <div class="form-field">
          <label data-i18n="param.n_attn">n_attention_heads</label>
          <input type="number" id="profile-n_attn" value="32" />
        </div>
        <div class="form-field">
          <label data-i18n="param.n_kv">n_kv_heads</label>
          <input type="number" id="profile-n_kv" value="8" />
        </div>
        <div class="form-field">
          <label data-i18n="param.d_head">head_dim</label>
          <input type="number" id="profile-d_head" value="128" />
        </div>
        <div class="form-field">
          <label data-i18n="param.n_layers">n_layers</label>
          <input type="number" id="profile-n_layers" value="32" />
        </div>
        <div class="form-field">
          <label data-i18n="param.n_params">n_params (e.g. 8e9)</label>
          <input type="text" id="profile-n_params" value="8e9" />
        </div>
        <div class="form-field">
          <label data-i18n="param.has_swa">Has SWA?</label>
          <select id="profile-has_swa">
            <option value="false" selected data-i18n="common.no">No</option>
            <option value="true" data-i18n="common.yes">Yes</option>
          </select>
        </div>
      </div>

      <button id="profile-btn" disabled data-i18n="profile.btn">🚀 Generate full profile</button>
    </section>

    <!-- INSPECTOR mode (paste config.json directly) -->
    <section id="inspector-section" style="display:none;">
      <div class="quickstart-banner" data-i18n="inspector.quickstart">
        💡 Use case: you have a private model not on HF Hub, or a config you're designing. Paste the raw JSON below and get a full TAF profile.
      </div>
      <h2><span data-i18n="inspector.title">🔍 Architecture Inspector</span>
        <span class="info"><span class="tooltip" data-i18n="inspector.tip">
          <strong>Paste any config.json directly</strong>. Tool parses it and runs the full Profile.
          Useful for: private models, in-development configs, models not yet on HuggingFace,
          or comparing what your custom architecture would do.
        </span></span>
      </h2>
      <p class="recipe-desc" data-i18n="inspector.desc">
        Paste the raw <code>config.json</code> contents. The tool extracts the architectural
        parameters and runs the full 5-recipe Profile.
      </p>
      <textarea id="inspector-json" rows="12"
        data-i18n-placeholder="inspector.placeholder"
        placeholder='{
  "model_type": "llama",
  "rope_theta": 500000,
  "max_position_embeddings": 8192,
  "num_attention_heads": 32,
  "num_key_value_heads": 8,
  "hidden_size": 4096,
  "num_hidden_layers": 32,
  "vocab_size": 128256
}'></textarea>
      <div class="form-row" style="margin-top:0.5rem;">
        <label for="inspector-T_eval" data-i18n="inspector.T_eval">T_eval (your target context):</label>
        <input type="number" id="inspector-T_eval" value="32000" />
      </div>
      <button id="inspector-btn" disabled data-i18n="inspector.btn">🚀 Inspect & profile</button>
      <span id="inspector-status" class="subtle" style="margin-left:0.75rem;"></span>
    </section>

    <!-- COMPARE mode -->
    <section id="compare-section" style="display:none;">
      <div class="quickstart-banner" data-i18n="compare.example">
        💡 Try: paste 3 popular 7-8B models (Meta-Llama-3-8B, Mistral-7B-v0.1, Qwen/Qwen2.5-7B), pick recipe X-2, T_eval=16000. See which best handles long context.
      </div>
      <h2><span data-i18n="compare.title">🆚 Compare models side-by-side</span>
        <span class="info"><span class="tooltip" data-i18n="compare.tip">
          <strong>Same recipe, multiple models</strong>. Pick 2-3 candidate models and
          one recipe. See verdicts in a single comparison table.<br><br>
          <strong>Use case</strong>: "I need long-context retrieval at 16K — which is
          best: Llama-3-8B, Mistral-7B, or Qwen-7B?" → pick 3 + X-2 + 16K → see winner.
        </span></span>
      </h2>
      <p class="recipe-desc" data-i18n="compare.desc">
        <strong>For technicians</strong>: when choosing between 2-3 candidate models for
        a specific deployment scenario. Compare their verdicts on the same recipe.
      </p>

      <div class="form-row">
        <label for="compare-recipe" data-i18n="compare.recipe_label">Recipe:</label>
        <select id="compare-recipe" disabled>
          <option value="" data-i18n="recipe.default">— pick a recipe —</option>
        </select>
      </div>

      <div class="form-row">
        <label for="compare-T_eval" data-i18n="compare.T_eval_label">T_eval (target context):</label>
        <input type="number" id="compare-T_eval" value="16000" style="flex:1;" />
        <span class="info" style="margin-top:0.5rem;"><span class="tooltip">
          For X-2 / X-19 only. The context length all compared models will be
          evaluated at. Other recipes use their own params.
        </span></span>
      </div>

      <div id="compare-models">
        <h3 style="margin-top:1rem;" data-i18n="compare.models_title">Models to compare (add up to 3)</h3>
        <div class="compare-slot" data-slot="1">
          <input type="text" class="compare-hf-id"
            data-i18n-placeholder="compare.slot1_placeholder"
            placeholder="HF model id (e.g. meta-llama/Meta-Llama-3-8B)" />
          <select class="compare-preset">
            <option value="" data-i18n="compare.preset_default">— or preset —</option>
          </select>
        </div>
        <div class="compare-slot" data-slot="2">
          <input type="text" class="compare-hf-id"
            data-i18n-placeholder="compare.slot2_placeholder"
            placeholder="HF model id #2" />
          <select class="compare-preset">
            <option value="" data-i18n="compare.preset_default">— or preset —</option>
          </select>
        </div>
        <div class="compare-slot" data-slot="3">
          <input type="text" class="compare-hf-id"
            data-i18n-placeholder="compare.slot3_placeholder"
            placeholder="HF model id #3 (optional)" />
          <select class="compare-preset">
            <option value="" data-i18n="compare.preset_default">— or preset —</option>
          </select>
        </div>
      </div>

      <button id="compare-btn" disabled style="margin-top:1rem;" data-i18n="compare.btn">🚀 Compare</button>
    </section>

    <!-- ASK mode (free-form question) -->
    <section id="ask-section" style="display:none;">
      <h2 data-i18n="ask.title">❓ Your question</h2>
      <textarea id="question" rows="3"
        data-i18n-placeholder="ask.placeholder"
        placeholder="e.g. Will Mistral-7B handle 16K NIAH retrieval? Or: I have $5,000, what model can I train? Or: Cheapest GPU to serve Llama-70B at 100M tokens/day?"></textarea>
      <div style="display:flex; gap:0.5rem; margin-top:0.5rem; flex-wrap:wrap;">
        <button id="ask-btn" disabled data-i18n="ask.btn">🚀 Analyze</button>
        <button id="example-btn" type="button" class="secondary" data-i18n="ask.example_btn">💡 Try an example</button>
      </div>
    </section>

    <!-- Diagnose mode: build the CLI command for diagnose_model.py -->
    <section id="diagnose-section" style="display:none;">
      <h2><span data-i18n="diagnose.title">🩺 Diagnose CLI Command Builder</span>
        <span class="info"><span class="tooltip" data-i18n="diagnose.tip">
          <strong>Measure γ_obs (not predict)</strong>. The browser tool predicts γ from
          config alone (Padé). To <em>measure</em> the actual decay on a real model
          you need GPU + Python. This builder produces the exact CLI command you
          run locally; the script is shipped in this repository at
          <code>cli/diagnose_model.py</code>.<br><br>
          <strong>Output</strong>: γ_obs, R², phase, KV cache budget D_90, KL anomaly,
          full thermodynamic profile (Z, U, S, F, C_V, χ). Saved as JSON.
        </span></span>
      </h2>
      <p class="recipe-desc" data-i18n="diagnose.desc">
        Pick options below and copy-paste the generated command on your local
        machine (Python + transformers + numpy). Total wall time ≈ 5 min in
        <code>--fast</code> mode on CPU; full mode 20–60 min on GPU.
      </p>

      <div class="form-row">
        <label for="diag-model" data-i18n="diagnose.model_label">HF model id:</label>
        <input type="text" id="diag-model" placeholder="EleutherAI/pythia-70m" value="EleutherAI/pythia-70m">
      </div>

      <div class="form-row">
        <label for="diag-theta" data-i18n="diagnose.theta_label">θ (auto if blank):</label>
        <input type="number" id="diag-theta" placeholder="auto-detect">
      </div>

      <div class="form-row">
        <label for="diag-N" data-i18n="diagnose.n_label">Context N:</label>
        <input type="number" id="diag-N" value="2000" min="100" max="32000">
      </div>

      <div class="form-row">
        <label data-i18n="diagnose.options_label">Options:</label>
        <span>
          <label><input type="checkbox" id="diag-fast" checked>
            <span data-i18n="diagnose.opt_fast">--fast (CPU, ~5 min)</span></label><br>
          <label><input type="checkbox" id="diag-cpu">
            <span data-i18n="diagnose.opt_cpu">--cpu (force CPU)</span></label><br>
          <label><input type="checkbox" id="diag-4bit">
            <span data-i18n="diagnose.opt_4bit">--load_in_4bit (≥7B models)</span></label>
        </span>
      </div>

      <div class="form-row">
        <label for="diag-local" data-i18n="diagnose.local_label">--local path (optional):</label>
        <input type="text" id="diag-local" placeholder="/path/to/local/weights">
      </div>

      <button id="diag-build-btn" data-i18n="diagnose.build_btn">📋 Build command</button>

      <div id="diag-output" style="display:none; margin-top:1em;">
        <h3 data-i18n="diagnose.cmd_title">Generated command:</h3>
        <pre id="diag-cmd" class="diag-cmd-box"></pre>
        <button id="diag-copy-btn" data-i18n="diagnose.copy_btn">📋 Copy to clipboard</button>
        <p class="recipe-desc" data-i18n="diagnose.next_steps">
          <strong>Next steps</strong>:
          (1) <code>git clone https://github.com/karlesmarin/tafagent</code>
          (2) <code>cd tafagent &amp;&amp; pip install torch transformers numpy</code>
          (3) Run the command above.
          (4) Result JSON lands in <code>./diagnose_results/</code> — upload it
          to the <strong>📋 Pick recipe</strong> mode (or paste in <strong>🔍 Inspect config</strong>) for full TAF analysis.
        </p>
      </div>
    </section>

    <!-- Phase diagram mode: live scatter of measured γ vs θ -->
    <section id="phase-section" style="display:none;">
      <h2><span data-i18n="phase.title">📊 Phase diagram (γ × θ)</span>
        <span class="info"><span class="tooltip" data-i18n="phase.tip">
          Each dot is one model from the paper's empirical panel
          (data/master_gamma_results.json). The x-axis is RoPE base θ
          on log scale; y-axis is measured γ.
          The Hagedorn line γ=1 separates Phase A (γ&lt;1, global) from
          Phase B (γ&gt;1, local-collapsed).
          Hover dots for details; click to populate the recipe form.
        </span></span>
      </h2>
      <p class="recipe-desc" data-i18n="phase.desc">
        23 models in the panel; the Padé curve (line) is
        γ_pred(θ) = (2θ−T√2)/(2θ+T√2) at T=2000.
      </p>
      <canvas id="phase-canvas" width="900" height="500" style="max-width:100%; background: var(--card-bg); border-radius: 6px;"></canvas>
      <div id="phase-info" class="recipe-desc" style="margin-top:0.6em;"></div>
    </section>

    <!-- Recipe selector (mode=recipe) -->
    <section id="recipe-section" style="display:none;">
      <h2 data-i18n="recipe.title">📋 Recipe</h2>
      <select id="recipe-select" disabled>
        <option value="" data-i18n="recipe.default">— select a recipe —</option>
      </select>
      <p id="recipe-desc-display" class="recipe-desc"></p>
    </section>

    <!-- Form (mode=recipe) -->
    <section id="form-section" style="display:none;">
      <h2 data-i18n="recipe.input_title">🎯 Inputs</h2>

      <div class="form-row">
        <label for="preset" data-i18n="profile.preset_label">Preset model:</label>
        <select id="preset" disabled>
          <option value="" data-i18n="profile.preset_default">— select to autofill —</option>
        </select>
      </div>

      <div class="form-row">
        <label for="hf-id" data-i18n="profile.hf_label">Or any HF model:</label>
        <input type="text" id="hf-id"
          data-i18n-placeholder="profile.hf_placeholder"
          placeholder="e.g. Qwen/Qwen2.5-32B-Instruct" style="flex:1;" />
        <button id="hf-fetch-btn" type="button" class="secondary" data-i18n="profile.fetch_btn">📥 Fetch</button>
      </div>
      <div id="hf-status" class="subtle" style="margin: -0.5rem 0 1rem; min-height:1.2em;"></div>

      <div id="dynamic-form" class="form-grid"></div>

      <button id="run-btn" disabled data-i18n="ask.btn">🚀 Analyze</button>
    </section>

    <!-- Output (single-recipe verdict + chain) -->
    <section id="output-section" style="display:none;">
      <h2 data-i18n="verdict.title">📊 Verdict</h2>
      <div id="verdict-box"></div>

      <div class="share-bar">
        <button id="share-btn" class="secondary" type="button" data-i18n="share.btn">🔗 Copy share link</button>
        <button id="recipe-download-btn" class="secondary" type="button" data-i18n="share.download">💾 Download JSON</button>
        <button id="recipe-submit-btn" class="secondary" type="button" data-i18n="share.submit">📤 Submit to registry</button>
        <span id="share-status" class="subtle"></span>
      </div>

      <h2 data-i18n="chain.title">🔍 Computation Chain</h2>
      <p class="subtle" data-i18n="chain.desc">Every number below is deterministic Python. Click a step to expand.</p>
      <div id="chain-box"></div>

      <h2 id="answer-header" style="display:none;" data-i18n="answer.title">💬 Plain-English Answer</h2>
      <div id="answer-box" style="display:none;"></div>
    </section>

    <!-- Profile output -->
    <section id="profile-output" style="display:none;">
      <h2 data-i18n="tafcard.title">📇 TAF Card — full model profile</h2>
      <div id="profile-box"></div>
    </section>

    <!-- Compare output -->
    <section id="compare-output" style="display:none;">
      <h2 data-i18n="compare.title_out">🆚 Comparison Table</h2>
      <div id="compare-box"></div>
    </section>

    <!-- Hidden file input for JSON upload (shared by all import buttons) -->
    <input type="file" id="import-file" accept=".json,application/json" style="display:none;" />

    <!-- Floating import bar (always visible) -->
    <section id="import-section">
      <h2 data-i18n="share.import_title">📂 Import a shared TAF result</h2>
      <p class="recipe-desc" data-i18n="share.import_desc">
        Got a JSON file from someone else's TAF analysis? Load it here to see the verdict + chain locally.
        Same view as if you'd run it yourself.
      </p>
      <button id="import-btn" class="secondary" type="button" data-i18n="share.import_btn">📂 Load shared JSON</button>
      <span id="import-status" class="subtle" style="margin-left:0.75rem;"></span>
    </section>

    <!-- Browse community submissions (live from GitHub Issues) -->
    <section id="community-section">
      <h2 data-i18n="community.title">🌐 Recent community submissions</h2>
      <p class="recipe-desc" data-i18n="community.desc">
        Live feed from the public registry. Click any submission to view full analysis.
        <a href="https://github.com/karlesmarin/tafagent-registry/issues" target="_blank" data-i18n="community.browse_all">Browse all →</a>
      </p>
      <div id="community-feed" class="subtle"><span data-i18n="community.loading">Loading...</span></div>
    </section>

    <!-- FALSIFICATION dashboard (paper predictions status) -->
    <section id="falsification-section">
      <h2 data-i18n="falsification.title">🔬 Paper predictions — falsification status</h2>
      <p class="recipe-desc" data-i18n="falsification.desc">
        The TAF framework rests on falsifiable predictions (F1-F23). Each is empirically tested.
        Here's the live status of every prediction in the paper.
      </p>
      <div id="falsification-table"></div>
    </section>
  </main>

  <footer>
    <p data-i18n="footer.text">
      © 2026 Carles Marin · Apache-2.0 · independent research · the tool that closes the loop of the paper.
    </p>
    <p>
      <a href="https://github.com/karlesmarin/tafagent" target="_blank">Source on GitHub</a>
      ·
      <a href="https://github.com/karlesmarin/NeurIPS" target="_blank">Paper repo</a>
    </p>
    <p class="subtle">
      Computation: Pyodide · Synthesis: WebLLM (Qwen2.5-0.5B local) · Hosting: GitHub Pages · Cost: $0
    </p>
  </footer>

  <script type="module" src="js/main.js"></script>
</body>
</html>