legal-eye / tau_rag /api /content_health_ui.py
Legal-i's picture
Initial deploy: legal-eye Hebrew legal RAG (17K corpus, verbatim-from-precedent)
3be54c6 verified
"""Content health HTML dashboard (v1.86).
Renders the same data that ``GET /v1/admin/content/health`` returns as
JSON, but as a self-contained HTML page. No JS framework, no external
CDN — just inline CSS. Designed to be bookmarked and opened in a tab.
Layout:
* Banner: overall score with a coloured bar
* Corpus stats: indexed / touched / dead / isolated
* Components breakdown: coverage / cite_rate / connectivity
* Top-cited workhorses
* Retrieval false-positives (top_noisy)
* Retriever ranking
* Co-citation top pairs
* Dead docs list
* Isolated docs list
Style mirrors ``admin_ui.py`` (v1.44) and ``chunks_ui`` (v1.68) —
neutral card layout, monospace for ids and numbers.
"""
from __future__ import annotations
import html as _h
from typing import Any, Dict, List
def _esc(s: object) -> str:
return _h.escape(str(s), quote=True)
def _score_color(score: float) -> str:
"""Red-yellow-green gradient stops. Matches common ops dashboards."""
if score >= 0.75:
return "#059669" # emerald-600 — healthy
if score >= 0.50:
return "#d97706" # amber-600 — watch
return "#dc2626" # red-600 — degraded
def _bar(value: float, color: str) -> str:
pct = max(0.0, min(1.0, float(value))) * 100
return (
f'<div style="background:#f3f4f6;border-radius:6px;height:14px;'
f'overflow:hidden;border:1px solid #e5e7eb">'
f'<div style="background:{color};height:100%;width:{pct:.1f}%"></div>'
f'</div>'
)
def _table(headers: List[str], rows: List[List[str]],
empty_msg: str = "— no data yet —") -> str:
if not rows:
return (f'<div style="padding:10px 14px;color:#6b7280;'
f'font-style:italic">{_esc(empty_msg)}</div>')
th = "".join(f'<th style="text-align:left;padding:6px 10px;'
f'background:#f9fafb;border-bottom:1px solid #e5e7eb;'
f'font-weight:600;font-size:13px">{_esc(h)}</th>'
for h in headers)
body = ""
for row in rows:
tds = "".join(
f'<td style="padding:6px 10px;border-bottom:1px solid #f3f4f6;'
f'font-family:monospace;font-size:13px">{c}</td>'
for c in row
)
body += f'<tr>{tds}</tr>'
return (f'<table style="border-collapse:collapse;width:100%;'
f'border:1px solid #e5e7eb;border-radius:6px;'
f'overflow:hidden"><thead><tr>{th}</tr></thead>'
f'<tbody>{body}</tbody></table>')
def _card(title: str, body_html: str, hint: str = "") -> str:
hint_html = (f'<div style="color:#6b7280;font-size:12px;margin-top:4px">'
f'{_esc(hint)}</div>') if hint else ""
return (
f'<div style="background:#fff;border:1px solid #e5e7eb;'
f'border-radius:8px;padding:14px 18px;margin:12px 0">'
f'<h3 style="margin:0 0 10px 0;font-size:15px">{_esc(title)}</h3>'
f'{hint_html}'
f'{body_html}</div>'
)
def _id_list(ids: List[str]) -> str:
if not ids:
return ('<span style="color:#6b7280;font-style:italic">'
'— none —</span>')
chips = "".join(
f'<span style="display:inline-block;background:#f3f4f6;'
f'border:1px solid #e5e7eb;border-radius:4px;padding:2px 8px;'
f'margin:2px;font-family:monospace;font-size:12px">{_esc(i)}</span>'
for i in ids
)
return chips
def render_content_health_ui(health: Dict[str, Any],
refresh_sec: int = 0) -> str:
"""Render the full dashboard from the dict produced by
``admin_content_health`` (v1.85)."""
score = float(health.get("score", 0.0))
coverage = float(health.get("coverage", 0.0))
cite_rate = float(health.get("cite_rate", 0.0))
connectivity = float(health.get("connectivity", 0.0))
corpus = health.get("corpus", {}) or {}
color = _score_color(score)
# ---- Banner
banner = (
f'<div style="background:#fff;border:2px solid {color};'
f'border-radius:12px;padding:18px 24px;margin-bottom:16px">'
f'<div style="display:flex;justify-content:space-between;'
f'align-items:center;gap:24px">'
f'<div>'
f'<div style="color:#6b7280;font-size:12px;text-transform:uppercase;'
f'letter-spacing:0.08em">Corpus Health Score</div>'
f'<div style="font-size:42px;font-weight:700;color:{color};'
f'font-family:monospace">{score:.2f}</div>'
f'</div>'
f'<div style="flex:1;min-width:200px">{_bar(score, color)}</div>'
f'</div></div>'
)
# ---- Components breakdown
components = _card(
"Components (geometric mean of 3 signals)",
f"""
<div style="display:grid;grid-template-columns:repeat(3,1fr);gap:12px">
<div>
<div style="color:#6b7280;font-size:12px">coverage</div>
<div style="font-family:monospace;font-size:20px;font-weight:600">
{coverage:.3f}
</div>
{_bar(coverage, _score_color(coverage))}
<div style="color:#9ca3af;font-size:11px;margin-top:4px">
touched / indexed
</div>
</div>
<div>
<div style="color:#6b7280;font-size:12px">cite_rate</div>
<div style="font-family:monospace;font-size:20px;font-weight:600">
{cite_rate:.3f}
</div>
{_bar(cite_rate, _score_color(cite_rate))}
<div style="color:#9ca3af;font-size:11px;margin-top:4px">
cited / retrieved
</div>
</div>
<div>
<div style="color:#6b7280;font-size:12px">connectivity</div>
<div style="font-family:monospace;font-size:20px;font-weight:600">
{connectivity:.3f}
</div>
{_bar(connectivity, _score_color(connectivity))}
<div style="color:#9ca3af;font-size:11px;margin-top:4px">
partnered / touched
</div>
</div>
</div>
""".strip(),
)
# ---- Corpus stats
stats_rows = [
["indexed", str(corpus.get("n_indexed", 0))],
["touched", str(corpus.get("n_touched", 0))],
["dead", str(corpus.get("n_dead", 0))],
["isolated", str(corpus.get("n_isolated", 0))],
]
corpus_block = _card(
"Corpus counts",
_table(["metric", "value"], stats_rows),
)
# ---- Top cited
top_cited_rows = [
[
_esc(r.get("doc_id", "")),
str(r.get("n_cited", 0)),
str(r.get("n_retrieved", 0)),
f"{r.get('cite_rate', 0.0):.3f}",
]
for r in (health.get("top_cited") or [])
]
top_cited_block = _card(
"Top-cited docs (workhorses)",
_table(["doc_id", "n_cited", "n_retrieved", "cite_rate"],
top_cited_rows,
empty_msg="no cites recorded yet"),
hint="docs doing the heavy lifting for user answers",
)
# ---- Top noisy
top_noisy_rows = [
[
_esc(r.get("doc_id", "")),
str(r.get("n_retrieved", 0)),
str(r.get("n_cited", 0)),
f"{r.get('cite_rate', 0.0):.3f}",
]
for r in (health.get("top_noisy") or [])
]
top_noisy_block = _card(
"Retrieval false-positives (noisy docs)",
_table(["doc_id", "n_retrieved", "n_cited", "cite_rate"],
top_noisy_rows,
empty_msg="no noisy docs — retrieval is precise"),
hint="retrieved often but never cited — tune retriever or drop doc",
)
# ---- Retriever ranking
retriever_ranking = (health.get("retrievers", {}) or {}).get("ranking") or []
retr_rows = [
[
_esc(r.get("name", "")),
f"{r.get('ranking_score', 0.0):.3f}",
f"{r.get('cite_rate', 0.0):.3f}",
str(r.get("n_doc_contributions", 0)),
str(r.get("n_cited_contributions", 0)),
]
for r in retriever_ranking
]
retr_block = _card(
"Retriever ranking (cite_rate × log(1+n))",
_table(["retriever", "score", "cite_rate", "n_docs", "n_cited"],
retr_rows,
empty_msg="no retriever activity yet"),
hint="low cite_rate = noisy proposals; low n = insufficient sample",
)
# ---- Co-citation pairs
pairs = (health.get("cocitation", {}) or {}).get("top_pairs") or []
pair_rows = [
[
_esc(p.get("a", "")),
_esc(p.get("b", "")),
str(p.get("count", 0)),
]
for p in pairs
]
coc_block = _card(
"Top co-citation pairs (empirical affinity)",
_table(["doc a", "doc b", "count"], pair_rows,
empty_msg="no multi-source responses yet"),
hint="pairs cited together — candidates for clusters / chunk merges",
)
# ---- Dead + isolated lists
dead_block = _card(
f'Dead docs ({len(health.get("dead_docs") or [])})',
_id_list(health.get("dead_docs") or []),
hint="indexed but never retrieved — dead corpus content",
)
iso_block = _card(
f'Isolated docs ({len(health.get("isolated_docs") or [])})',
_id_list(health.get("isolated_docs") or []),
hint="touched but never co-cited — always-alone docs",
)
meta_html = ""
if refresh_sec > 0:
meta_html = (f'<meta http-equiv="refresh" content="{int(refresh_sec)}">')
return f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>tau-rag · content health</title>
{meta_html}
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui,
sans-serif;
margin: 0; padding: 24px;
background: #f9fafb; color: #1f2937;
}}
.wrap {{ max-width: 1100px; margin: 0 auto; }}
h1 {{ margin: 0 0 4px 0; font-size: 22px; }}
.sub {{ color: #6b7280; font-size: 13px; margin: 0 0 20px 0; }}
a {{ color: #2563eb; }}
</style>
</head>
<body>
<div class="wrap">
<h1>📊 tau-rag · content health</h1>
<div class="sub">Consolidated view of doc (v1.82) + retriever (v1.83) +
co-citation (v1.84) analytics. Read-only; no writes from this page.
{'Auto-refresh every ' + str(int(refresh_sec)) + 's.'
if refresh_sec > 0 else ''}
</div>
{banner}
{components}
{corpus_block}
{top_cited_block}
{top_noisy_block}
{retr_block}
{coc_block}
{dead_block}
{iso_block}
</div>
</body>
</html>"""
__all__ = ["render_content_health_ui"]