"""Content health HTML dashboard (v1.86).
Renders the same data that ``GET /v1/admin/content/health`` returns as
JSON, but as a self-contained HTML page. No JS framework, no external
CDN — just inline CSS. Designed to be bookmarked and opened in a tab.
Layout:
* Banner: overall score with a coloured bar
* Corpus stats: indexed / touched / dead / isolated
* Components breakdown: coverage / cite_rate / connectivity
* Top-cited workhorses
* Retrieval false-positives (top_noisy)
* Retriever ranking
* Co-citation top pairs
* Dead docs list
* Isolated docs list
Style mirrors ``admin_ui.py`` (v1.44) and ``chunks_ui`` (v1.68) —
neutral card layout, monospace for ids and numbers.
"""
from __future__ import annotations
import html as _h
from typing import Any, Dict, List
def _esc(s: object) -> str:
return _h.escape(str(s), quote=True)
def _score_color(score: float) -> str:
"""Red-yellow-green gradient stops. Matches common ops dashboards."""
if score >= 0.75:
return "#059669" # emerald-600 — healthy
if score >= 0.50:
return "#d97706" # amber-600 — watch
return "#dc2626" # red-600 — degraded
def _bar(value: float, color: str) -> str:
pct = max(0.0, min(1.0, float(value))) * 100
return (
f'
'
)
def _table(headers: List[str], rows: List[List[str]],
empty_msg: str = "— no data yet —") -> str:
if not rows:
return (f'{_esc(empty_msg)}
')
th = "".join(f'{_esc(h)} | '
for h in headers)
body = ""
for row in rows:
tds = "".join(
f'{c} | '
for c in row
)
body += f'{tds}
'
return (f'')
def _card(title: str, body_html: str, hint: str = "") -> str:
hint_html = (f''
f'{_esc(hint)}
') if hint else ""
return (
f''
f'
{_esc(title)}
'
f'{hint_html}'
f'{body_html}'
)
def _id_list(ids: List[str]) -> str:
if not ids:
return (''
'— none —')
chips = "".join(
f'{_esc(i)}'
for i in ids
)
return chips
def render_content_health_ui(health: Dict[str, Any],
refresh_sec: int = 0) -> str:
"""Render the full dashboard from the dict produced by
``admin_content_health`` (v1.85)."""
score = float(health.get("score", 0.0))
coverage = float(health.get("coverage", 0.0))
cite_rate = float(health.get("cite_rate", 0.0))
connectivity = float(health.get("connectivity", 0.0))
corpus = health.get("corpus", {}) or {}
color = _score_color(score)
# ---- Banner
banner = (
f''
f'
'
f'
'
f'
Corpus Health Score
'
f'
{score:.2f}
'
f'
'
f'
{_bar(score, color)}
'
f'
'
)
# ---- Components breakdown
components = _card(
"Components (geometric mean of 3 signals)",
f"""
coverage
{coverage:.3f}
{_bar(coverage, _score_color(coverage))}
touched / indexed
cite_rate
{cite_rate:.3f}
{_bar(cite_rate, _score_color(cite_rate))}
cited / retrieved
connectivity
{connectivity:.3f}
{_bar(connectivity, _score_color(connectivity))}
partnered / touched
""".strip(),
)
# ---- Corpus stats
stats_rows = [
["indexed", str(corpus.get("n_indexed", 0))],
["touched", str(corpus.get("n_touched", 0))],
["dead", str(corpus.get("n_dead", 0))],
["isolated", str(corpus.get("n_isolated", 0))],
]
corpus_block = _card(
"Corpus counts",
_table(["metric", "value"], stats_rows),
)
# ---- Top cited
top_cited_rows = [
[
_esc(r.get("doc_id", "")),
str(r.get("n_cited", 0)),
str(r.get("n_retrieved", 0)),
f"{r.get('cite_rate', 0.0):.3f}",
]
for r in (health.get("top_cited") or [])
]
top_cited_block = _card(
"Top-cited docs (workhorses)",
_table(["doc_id", "n_cited", "n_retrieved", "cite_rate"],
top_cited_rows,
empty_msg="no cites recorded yet"),
hint="docs doing the heavy lifting for user answers",
)
# ---- Top noisy
top_noisy_rows = [
[
_esc(r.get("doc_id", "")),
str(r.get("n_retrieved", 0)),
str(r.get("n_cited", 0)),
f"{r.get('cite_rate', 0.0):.3f}",
]
for r in (health.get("top_noisy") or [])
]
top_noisy_block = _card(
"Retrieval false-positives (noisy docs)",
_table(["doc_id", "n_retrieved", "n_cited", "cite_rate"],
top_noisy_rows,
empty_msg="no noisy docs — retrieval is precise"),
hint="retrieved often but never cited — tune retriever or drop doc",
)
# ---- Retriever ranking
retriever_ranking = (health.get("retrievers", {}) or {}).get("ranking") or []
retr_rows = [
[
_esc(r.get("name", "")),
f"{r.get('ranking_score', 0.0):.3f}",
f"{r.get('cite_rate', 0.0):.3f}",
str(r.get("n_doc_contributions", 0)),
str(r.get("n_cited_contributions", 0)),
]
for r in retriever_ranking
]
retr_block = _card(
"Retriever ranking (cite_rate × log(1+n))",
_table(["retriever", "score", "cite_rate", "n_docs", "n_cited"],
retr_rows,
empty_msg="no retriever activity yet"),
hint="low cite_rate = noisy proposals; low n = insufficient sample",
)
# ---- Co-citation pairs
pairs = (health.get("cocitation", {}) or {}).get("top_pairs") or []
pair_rows = [
[
_esc(p.get("a", "")),
_esc(p.get("b", "")),
str(p.get("count", 0)),
]
for p in pairs
]
coc_block = _card(
"Top co-citation pairs (empirical affinity)",
_table(["doc a", "doc b", "count"], pair_rows,
empty_msg="no multi-source responses yet"),
hint="pairs cited together — candidates for clusters / chunk merges",
)
# ---- Dead + isolated lists
dead_block = _card(
f'Dead docs ({len(health.get("dead_docs") or [])})',
_id_list(health.get("dead_docs") or []),
hint="indexed but never retrieved — dead corpus content",
)
iso_block = _card(
f'Isolated docs ({len(health.get("isolated_docs") or [])})',
_id_list(health.get("isolated_docs") or []),
hint="touched but never co-cited — always-alone docs",
)
meta_html = ""
if refresh_sec > 0:
meta_html = (f'')
return f"""
tau-rag · content health
{meta_html}
📊 tau-rag · content health
Consolidated view of doc (v1.82) + retriever (v1.83) +
co-citation (v1.84) analytics. Read-only; no writes from this page.
{'Auto-refresh every ' + str(int(refresh_sec)) + 's.'
if refresh_sec > 0 else ''}
{banner}
{components}
{corpus_block}
{top_cited_block}
{top_noisy_block}
{retr_block}
{coc_block}
{dead_block}
{iso_block}
"""
__all__ = ["render_content_health_ui"]