| """Content health HTML dashboard (v1.86). |
| |
| Renders the same data that ``GET /v1/admin/content/health`` returns as |
| JSON, but as a self-contained HTML page. No JS framework, no external |
| CDN — just inline CSS. Designed to be bookmarked and opened in a tab. |
| |
| Layout: |
| * Banner: overall score with a coloured bar |
| * Corpus stats: indexed / touched / dead / isolated |
| * Components breakdown: coverage / cite_rate / connectivity |
| * Top-cited workhorses |
| * Retrieval false-positives (top_noisy) |
| * Retriever ranking |
| * Co-citation top pairs |
| * Dead docs list |
| * Isolated docs list |
| |
| Style mirrors ``admin_ui.py`` (v1.44) and ``chunks_ui`` (v1.68) — |
| neutral card layout, monospace for ids and numbers. |
| """ |
| from __future__ import annotations |
|
|
| import html as _h |
| from typing import Any, Dict, List |
|
|
|
|
| def _esc(s: object) -> str: |
| return _h.escape(str(s), quote=True) |
|
|
|
|
| def _score_color(score: float) -> str: |
| """Red-yellow-green gradient stops. Matches common ops dashboards.""" |
| if score >= 0.75: |
| return "#059669" |
| if score >= 0.50: |
| return "#d97706" |
| return "#dc2626" |
|
|
|
|
| def _bar(value: float, color: str) -> str: |
| pct = max(0.0, min(1.0, float(value))) * 100 |
| return ( |
| f'<div style="background:#f3f4f6;border-radius:6px;height:14px;' |
| f'overflow:hidden;border:1px solid #e5e7eb">' |
| f'<div style="background:{color};height:100%;width:{pct:.1f}%"></div>' |
| f'</div>' |
| ) |
|
|
|
|
| def _table(headers: List[str], rows: List[List[str]], |
| empty_msg: str = "— no data yet —") -> str: |
| if not rows: |
| return (f'<div style="padding:10px 14px;color:#6b7280;' |
| f'font-style:italic">{_esc(empty_msg)}</div>') |
| th = "".join(f'<th style="text-align:left;padding:6px 10px;' |
| f'background:#f9fafb;border-bottom:1px solid #e5e7eb;' |
| f'font-weight:600;font-size:13px">{_esc(h)}</th>' |
| for h in headers) |
| body = "" |
| for row in rows: |
| tds = "".join( |
| f'<td style="padding:6px 10px;border-bottom:1px solid #f3f4f6;' |
| f'font-family:monospace;font-size:13px">{c}</td>' |
| for c in row |
| ) |
| body += f'<tr>{tds}</tr>' |
| return (f'<table style="border-collapse:collapse;width:100%;' |
| f'border:1px solid #e5e7eb;border-radius:6px;' |
| f'overflow:hidden"><thead><tr>{th}</tr></thead>' |
| f'<tbody>{body}</tbody></table>') |
|
|
|
|
| def _card(title: str, body_html: str, hint: str = "") -> str: |
| hint_html = (f'<div style="color:#6b7280;font-size:12px;margin-top:4px">' |
| f'{_esc(hint)}</div>') if hint else "" |
| return ( |
| f'<div style="background:#fff;border:1px solid #e5e7eb;' |
| f'border-radius:8px;padding:14px 18px;margin:12px 0">' |
| f'<h3 style="margin:0 0 10px 0;font-size:15px">{_esc(title)}</h3>' |
| f'{hint_html}' |
| f'{body_html}</div>' |
| ) |
|
|
|
|
| def _id_list(ids: List[str]) -> str: |
| if not ids: |
| return ('<span style="color:#6b7280;font-style:italic">' |
| '— none —</span>') |
| chips = "".join( |
| f'<span style="display:inline-block;background:#f3f4f6;' |
| f'border:1px solid #e5e7eb;border-radius:4px;padding:2px 8px;' |
| f'margin:2px;font-family:monospace;font-size:12px">{_esc(i)}</span>' |
| for i in ids |
| ) |
| return chips |
|
|
|
|
| def render_content_health_ui(health: Dict[str, Any], |
| refresh_sec: int = 0) -> str: |
| """Render the full dashboard from the dict produced by |
| ``admin_content_health`` (v1.85).""" |
| score = float(health.get("score", 0.0)) |
| coverage = float(health.get("coverage", 0.0)) |
| cite_rate = float(health.get("cite_rate", 0.0)) |
| connectivity = float(health.get("connectivity", 0.0)) |
| corpus = health.get("corpus", {}) or {} |
| color = _score_color(score) |
|
|
| |
| banner = ( |
| f'<div style="background:#fff;border:2px solid {color};' |
| f'border-radius:12px;padding:18px 24px;margin-bottom:16px">' |
| f'<div style="display:flex;justify-content:space-between;' |
| f'align-items:center;gap:24px">' |
| f'<div>' |
| f'<div style="color:#6b7280;font-size:12px;text-transform:uppercase;' |
| f'letter-spacing:0.08em">Corpus Health Score</div>' |
| f'<div style="font-size:42px;font-weight:700;color:{color};' |
| f'font-family:monospace">{score:.2f}</div>' |
| f'</div>' |
| f'<div style="flex:1;min-width:200px">{_bar(score, color)}</div>' |
| f'</div></div>' |
| ) |
|
|
| |
| components = _card( |
| "Components (geometric mean of 3 signals)", |
| f""" |
| <div style="display:grid;grid-template-columns:repeat(3,1fr);gap:12px"> |
| <div> |
| <div style="color:#6b7280;font-size:12px">coverage</div> |
| <div style="font-family:monospace;font-size:20px;font-weight:600"> |
| {coverage:.3f} |
| </div> |
| {_bar(coverage, _score_color(coverage))} |
| <div style="color:#9ca3af;font-size:11px;margin-top:4px"> |
| touched / indexed |
| </div> |
| </div> |
| <div> |
| <div style="color:#6b7280;font-size:12px">cite_rate</div> |
| <div style="font-family:monospace;font-size:20px;font-weight:600"> |
| {cite_rate:.3f} |
| </div> |
| {_bar(cite_rate, _score_color(cite_rate))} |
| <div style="color:#9ca3af;font-size:11px;margin-top:4px"> |
| cited / retrieved |
| </div> |
| </div> |
| <div> |
| <div style="color:#6b7280;font-size:12px">connectivity</div> |
| <div style="font-family:monospace;font-size:20px;font-weight:600"> |
| {connectivity:.3f} |
| </div> |
| {_bar(connectivity, _score_color(connectivity))} |
| <div style="color:#9ca3af;font-size:11px;margin-top:4px"> |
| partnered / touched |
| </div> |
| </div> |
| </div> |
| """.strip(), |
| ) |
|
|
| |
| stats_rows = [ |
| ["indexed", str(corpus.get("n_indexed", 0))], |
| ["touched", str(corpus.get("n_touched", 0))], |
| ["dead", str(corpus.get("n_dead", 0))], |
| ["isolated", str(corpus.get("n_isolated", 0))], |
| ] |
| corpus_block = _card( |
| "Corpus counts", |
| _table(["metric", "value"], stats_rows), |
| ) |
|
|
| |
| top_cited_rows = [ |
| [ |
| _esc(r.get("doc_id", "")), |
| str(r.get("n_cited", 0)), |
| str(r.get("n_retrieved", 0)), |
| f"{r.get('cite_rate', 0.0):.3f}", |
| ] |
| for r in (health.get("top_cited") or []) |
| ] |
| top_cited_block = _card( |
| "Top-cited docs (workhorses)", |
| _table(["doc_id", "n_cited", "n_retrieved", "cite_rate"], |
| top_cited_rows, |
| empty_msg="no cites recorded yet"), |
| hint="docs doing the heavy lifting for user answers", |
| ) |
|
|
| |
| top_noisy_rows = [ |
| [ |
| _esc(r.get("doc_id", "")), |
| str(r.get("n_retrieved", 0)), |
| str(r.get("n_cited", 0)), |
| f"{r.get('cite_rate', 0.0):.3f}", |
| ] |
| for r in (health.get("top_noisy") or []) |
| ] |
| top_noisy_block = _card( |
| "Retrieval false-positives (noisy docs)", |
| _table(["doc_id", "n_retrieved", "n_cited", "cite_rate"], |
| top_noisy_rows, |
| empty_msg="no noisy docs — retrieval is precise"), |
| hint="retrieved often but never cited — tune retriever or drop doc", |
| ) |
|
|
| |
| retriever_ranking = (health.get("retrievers", {}) or {}).get("ranking") or [] |
| retr_rows = [ |
| [ |
| _esc(r.get("name", "")), |
| f"{r.get('ranking_score', 0.0):.3f}", |
| f"{r.get('cite_rate', 0.0):.3f}", |
| str(r.get("n_doc_contributions", 0)), |
| str(r.get("n_cited_contributions", 0)), |
| ] |
| for r in retriever_ranking |
| ] |
| retr_block = _card( |
| "Retriever ranking (cite_rate × log(1+n))", |
| _table(["retriever", "score", "cite_rate", "n_docs", "n_cited"], |
| retr_rows, |
| empty_msg="no retriever activity yet"), |
| hint="low cite_rate = noisy proposals; low n = insufficient sample", |
| ) |
|
|
| |
| pairs = (health.get("cocitation", {}) or {}).get("top_pairs") or [] |
| pair_rows = [ |
| [ |
| _esc(p.get("a", "")), |
| _esc(p.get("b", "")), |
| str(p.get("count", 0)), |
| ] |
| for p in pairs |
| ] |
| coc_block = _card( |
| "Top co-citation pairs (empirical affinity)", |
| _table(["doc a", "doc b", "count"], pair_rows, |
| empty_msg="no multi-source responses yet"), |
| hint="pairs cited together — candidates for clusters / chunk merges", |
| ) |
|
|
| |
| dead_block = _card( |
| f'Dead docs ({len(health.get("dead_docs") or [])})', |
| _id_list(health.get("dead_docs") or []), |
| hint="indexed but never retrieved — dead corpus content", |
| ) |
| iso_block = _card( |
| f'Isolated docs ({len(health.get("isolated_docs") or [])})', |
| _id_list(health.get("isolated_docs") or []), |
| hint="touched but never co-cited — always-alone docs", |
| ) |
|
|
| meta_html = "" |
| if refresh_sec > 0: |
| meta_html = (f'<meta http-equiv="refresh" content="{int(refresh_sec)}">') |
|
|
| return f"""<!doctype html> |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8"> |
| <title>tau-rag · content health</title> |
| {meta_html} |
| <style> |
| body {{ |
| font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, |
| sans-serif; |
| margin: 0; padding: 24px; |
| background: #f9fafb; color: #1f2937; |
| }} |
| .wrap {{ max-width: 1100px; margin: 0 auto; }} |
| h1 {{ margin: 0 0 4px 0; font-size: 22px; }} |
| .sub {{ color: #6b7280; font-size: 13px; margin: 0 0 20px 0; }} |
| a {{ color: #2563eb; }} |
| </style> |
| </head> |
| <body> |
| <div class="wrap"> |
| <h1>📊 tau-rag · content health</h1> |
| <div class="sub">Consolidated view of doc (v1.82) + retriever (v1.83) + |
| co-citation (v1.84) analytics. Read-only; no writes from this page. |
| {'Auto-refresh every ' + str(int(refresh_sec)) + 's.' |
| if refresh_sec > 0 else ''} |
| </div> |
| {banner} |
| {components} |
| {corpus_block} |
| {top_cited_block} |
| {top_noisy_block} |
| {retr_block} |
| {coc_block} |
| {dead_block} |
| {iso_block} |
| </div> |
| </body> |
| </html>""" |
|
|
|
|
| __all__ = ["render_content_health_ui"] |
|
|