"""Content health HTML dashboard (v1.86). Renders the same data that ``GET /v1/admin/content/health`` returns as JSON, but as a self-contained HTML page. No JS framework, no external CDN — just inline CSS. Designed to be bookmarked and opened in a tab. Layout: * Banner: overall score with a coloured bar * Corpus stats: indexed / touched / dead / isolated * Components breakdown: coverage / cite_rate / connectivity * Top-cited workhorses * Retrieval false-positives (top_noisy) * Retriever ranking * Co-citation top pairs * Dead docs list * Isolated docs list Style mirrors ``admin_ui.py`` (v1.44) and ``chunks_ui`` (v1.68) — neutral card layout, monospace for ids and numbers. """ from __future__ import annotations import html as _h from typing import Any, Dict, List def _esc(s: object) -> str: return _h.escape(str(s), quote=True) def _score_color(score: float) -> str: """Red-yellow-green gradient stops. Matches common ops dashboards.""" if score >= 0.75: return "#059669" # emerald-600 — healthy if score >= 0.50: return "#d97706" # amber-600 — watch return "#dc2626" # red-600 — degraded def _bar(value: float, color: str) -> str: pct = max(0.0, min(1.0, float(value))) * 100 return ( f'

' f'

' ) def _table(headers: List[str], rows: List[List[str]], empty_msg: str = "— no data yet —") -> str: if not rows: return (f'

{_esc(empty_msg)}

') th = "".join(f'{_esc(h)}' for h in headers) body = "" for row in rows: tds = "".join( f'{c}' for c in row ) body += f'{tds}' return (f'{th}' f'{body}

') def _card(title: str, body_html: str, hint: str = "") -> str: hint_html = (f'

' f'{_esc(hint)}

') if hint else "" return ( f'

' f'

{_esc(title)}

' f'{hint_html}' f'{body_html}

' ) def _id_list(ids: List[str]) -> str: if not ids: return ('' '— none —') chips = "".join( f'{_esc(i)}' for i in ids ) return chips def render_content_health_ui(health: Dict[str, Any], refresh_sec: int = 0) -> str: """Render the full dashboard from the dict produced by ``admin_content_health`` (v1.85).""" score = float(health.get("score", 0.0)) coverage = float(health.get("coverage", 0.0)) cite_rate = float(health.get("cite_rate", 0.0)) connectivity = float(health.get("connectivity", 0.0)) corpus = health.get("corpus", {}) or {} color = _score_color(score) # ---- Banner banner = ( f'

' f'

Corpus Health Score

' f'

{score:.2f}

' f'

{_bar(score, color)}

' f'

' ) # ---- Components breakdown components = _card( "Components (geometric mean of 3 signals)", f"""

coverage

              {coverage:.3f}
            

{_bar(coverage, _score_color(coverage))}

touched / indexed

cite_rate

              {cite_rate:.3f}
            

{_bar(cite_rate, _score_color(cite_rate))}

cited / retrieved

connectivity

              {connectivity:.3f}
            

{_bar(connectivity, _score_color(connectivity))}

partnered / touched

""".strip(), ) # ---- Corpus stats stats_rows = [ ["indexed", str(corpus.get("n_indexed", 0))], ["touched", str(corpus.get("n_touched", 0))], ["dead", str(corpus.get("n_dead", 0))], ["isolated", str(corpus.get("n_isolated", 0))], ] corpus_block = _card( "Corpus counts", _table(["metric", "value"], stats_rows), ) # ---- Top cited top_cited_rows = [ [ _esc(r.get("doc_id", "")), str(r.get("n_cited", 0)), str(r.get("n_retrieved", 0)), f"{r.get('cite_rate', 0.0):.3f}", ] for r in (health.get("top_cited") or []) ] top_cited_block = _card( "Top-cited docs (workhorses)", _table(["doc_id", "n_cited", "n_retrieved", "cite_rate"], top_cited_rows, empty_msg="no cites recorded yet"), hint="docs doing the heavy lifting for user answers", ) # ---- Top noisy top_noisy_rows = [ [ _esc(r.get("doc_id", "")), str(r.get("n_retrieved", 0)), str(r.get("n_cited", 0)), f"{r.get('cite_rate', 0.0):.3f}", ] for r in (health.get("top_noisy") or []) ] top_noisy_block = _card( "Retrieval false-positives (noisy docs)", _table(["doc_id", "n_retrieved", "n_cited", "cite_rate"], top_noisy_rows, empty_msg="no noisy docs — retrieval is precise"), hint="retrieved often but never cited — tune retriever or drop doc", ) # ---- Retriever ranking retriever_ranking = (health.get("retrievers", {}) or {}).get("ranking") or [] retr_rows = [ [ _esc(r.get("name", "")), f"{r.get('ranking_score', 0.0):.3f}", f"{r.get('cite_rate', 0.0):.3f}", str(r.get("n_doc_contributions", 0)), str(r.get("n_cited_contributions", 0)), ] for r in retriever_ranking ] retr_block = _card( "Retriever ranking (cite_rate × log(1+n))", _table(["retriever", "score", "cite_rate", "n_docs", "n_cited"], retr_rows, empty_msg="no retriever activity yet"), hint="low cite_rate = noisy proposals; low n = insufficient sample", ) # ---- Co-citation pairs pairs = (health.get("cocitation", {}) or {}).get("top_pairs") or [] pair_rows = [ [ _esc(p.get("a", "")), _esc(p.get("b", "")), str(p.get("count", 0)), ] for p in pairs ] coc_block = _card( "Top co-citation pairs (empirical affinity)", _table(["doc a", "doc b", "count"], pair_rows, empty_msg="no multi-source responses yet"), hint="pairs cited together — candidates for clusters / chunk merges", ) # ---- Dead + isolated lists dead_block = _card( f'Dead docs ({len(health.get("dead_docs") or [])})', _id_list(health.get("dead_docs") or []), hint="indexed but never retrieved — dead corpus content", ) iso_block = _card( f'Isolated docs ({len(health.get("isolated_docs") or [])})', _id_list(health.get("isolated_docs") or []), hint="touched but never co-cited — always-alone docs", ) meta_html = "" if refresh_sec > 0: meta_html = (f'') return f""" tau-rag · content health {meta_html}

📊 tau-rag · content health

Consolidated view of doc (v1.82) + retriever (v1.83) + co-citation (v1.84) analytics. Read-only; no writes from this page. {'Auto-refresh every ' + str(int(refresh_sec)) + 's.' if refresh_sec > 0 else ''}

{banner} {components} {corpus_block} {top_cited_block} {top_noisy_block} {retr_block} {coc_block} {dead_block} {iso_block}

""" __all__ = ["render_content_health_ui"]