legal-eye / tau_rag /api /query_analytics_ui.py
Legal-i's picture
Initial deploy: legal-eye Hebrew legal RAG (17K corpus, verbatim-from-precedent)
3be54c6 verified
raw
history blame contribute delete
9.41 kB
"""Query analytics HTML dashboard (v1.98).
Visual complement to v1.89 (query_stats) + v1.90 (promote candidates)
+ v1.96 (query × doc affinity). Same design as v1.86 content health UI:
inline CSS, zero JS framework, zero CDN, ?refresh=N for wall screens.
Sections:
* Banner: n_unique_queries, n_events, avg_sources_per_query.
* Top queries (by count).
* Promote candidates (count ≥ threshold, not already a preset).
* Query × doc affinity — top N queries × top M docs as a heat-table.
* Isolated queries — those with 0 cited docs (retrieval gap).
Style mirrors the v1.86 dashboard so ops have one visual language.
"""
from __future__ import annotations
import html as _h
from typing import Any, Dict, List, Optional
def _esc(s: object) -> str:
return _h.escape(str(s), quote=True)
def _table(headers: List[str], rows: List[List[str]],
empty_msg: str = "— no data yet —") -> str:
if not rows:
return (f'<div style="padding:10px 14px;color:#6b7280;'
f'font-style:italic">{_esc(empty_msg)}</div>')
th = "".join(f'<th style="text-align:left;padding:6px 10px;'
f'background:#f9fafb;border-bottom:1px solid #e5e7eb;'
f'font-weight:600;font-size:13px">{_esc(h)}</th>'
for h in headers)
body = ""
for row in rows:
tds = "".join(
f'<td style="padding:6px 10px;border-bottom:1px solid #f3f4f6;'
f'font-family:monospace;font-size:13px">{c}</td>'
for c in row
)
body += f'<tr>{tds}</tr>'
return (f'<table style="border-collapse:collapse;width:100%;'
f'border:1px solid #e5e7eb;border-radius:6px;'
f'overflow:hidden"><thead><tr>{th}</tr></thead>'
f'<tbody>{body}</tbody></table>')
def _card(title: str, body_html: str, hint: str = "") -> str:
hint_html = (f'<div style="color:#6b7280;font-size:12px;margin-top:4px">'
f'{_esc(hint)}</div>') if hint else ""
return (
f'<div style="background:#fff;border:1px solid #e5e7eb;'
f'border-radius:8px;padding:14px 18px;margin:12px 0">'
f'<h3 style="margin:0 0 10px 0;font-size:15px">{_esc(title)}</h3>'
f'{hint_html}'
f'{body_html}</div>'
)
def _cell_color(count: int, max_count: int) -> str:
"""Heatmap cell background: empty (0) → neutral, max → deep blue."""
if count <= 0 or max_count <= 0:
return "#f9fafb"
ratio = min(1.0, count / max_count)
# Blend from #f9fafb (light) to #1e40af (deep blue)
# Simple mix: higher count = darker blue
r = int(0xf9 + (0x1e - 0xf9) * ratio)
g = int(0xfa + (0x40 - 0xfa) * ratio)
b = int(0xfb + (0xaf - 0xfb) * ratio)
return f"#{r:02x}{g:02x}{b:02x}"
def _heatmap(queries: List[Dict], docs: List[str],
pair_lookup: Dict, ) -> str:
"""Render a query × doc heat table. pair_lookup maps (fp, doc_id) → count."""
if not queries or not docs:
return ('<div style="padding:10px 14px;color:#6b7280;'
'font-style:italic">— not enough data for matrix —</div>')
max_count = 0
for q in queries:
for did in docs:
c = pair_lookup.get((q["fingerprint"], did), 0)
if c > max_count:
max_count = c
header_cells = "".join(
f'<th style="padding:4px 8px;background:#f9fafb;'
f'border-bottom:1px solid #e5e7eb;font-weight:600;font-size:12px;'
f'writing-mode:horizontal-tb;max-width:120px;'
f'word-break:break-all">{_esc(did)}</th>'
for did in docs
)
body_rows = ""
for q in queries:
row_label = f'{_esc(q.get("sample") or q["fingerprint"])[:40]}'
row_cells = ""
for did in docs:
c = pair_lookup.get((q["fingerprint"], did), 0)
bg = _cell_color(c, max_count)
text = str(c) if c > 0 else ""
color = "#fff" if c > max_count * 0.6 else "#1f2937"
row_cells += (
f'<td style="text-align:center;padding:6px 10px;'
f'background:{bg};color:{color};'
f'font-family:monospace;font-size:12px;'
f'border-right:1px solid #e5e7eb">{text}</td>'
)
body_rows += (
f'<tr><td style="text-align:right;padding:6px 10px;'
f'font-family:monospace;font-size:12px;color:#374151;'
f'border-right:1px solid #e5e7eb;max-width:200px;'
f'white-space:nowrap;overflow:hidden;text-overflow:ellipsis">'
f'{row_label}</td>{row_cells}</tr>'
)
return (
f'<table style="border-collapse:collapse;width:100%;'
f'border:1px solid #e5e7eb;border-radius:6px;overflow:hidden">'
f'<thead><tr><th style="padding:4px 8px;background:#f9fafb;'
f'border-bottom:1px solid #e5e7eb">query ↓ / doc →</th>'
f'{header_cells}</tr></thead>'
f'<tbody>{body_rows}</tbody></table>'
)
def render_query_analytics_ui(
summary: Dict[str, Any],
top_queries: List[Dict[str, Any]],
promote_candidates: List[Dict[str, Any]],
matrix_queries: List[Dict[str, Any]],
matrix_docs: List[str],
matrix_pairs: Dict,
refresh_sec: int = 0,
) -> str:
"""Render the HTML. All inputs are plain dicts/lists — pure render.
* ``matrix_pairs``: dict keyed by ``(fingerprint, doc_id)`` → count.
Caller pre-computes this so the renderer is O(N·M) lookup only.
"""
# Banner
banner = (
f'<div style="background:#fff;border:2px solid #2563eb;'
f'border-radius:12px;padding:16px 22px;margin-bottom:16px">'
f'<div style="color:#6b7280;font-size:12px;text-transform:uppercase;'
f'letter-spacing:0.08em">Query analytics overview</div>'
f'<div style="display:flex;gap:32px;margin-top:6px">'
f'<div><div style="color:#6b7280;font-size:12px">unique queries'
f'</div><div style="font-family:monospace;font-size:24px;'
f'font-weight:600">{summary.get("n_unique_queries", 0)}</div></div>'
f'<div><div style="color:#6b7280;font-size:12px">total events'
f'</div><div style="font-family:monospace;font-size:24px;'
f'font-weight:600">{summary.get("n_events", 0)}</div></div>'
f'<div><div style="color:#6b7280;font-size:12px">avg sources'
f'</div><div style="font-family:monospace;font-size:24px;'
f'font-weight:600">'
f'{summary.get("avg_sources_per_query", 0.0):.2f}</div></div>'
f'</div></div>'
)
# Top queries table
top_rows = [
[
_esc(row.get("sample") or row["fingerprint"])[:60],
str(row["count"]),
f'{row.get("avg_sources", 0.0):.2f}',
f'{row.get("avg_latency_ms", 0.0):.0f}',
]
for row in top_queries
]
top_block = _card(
"Top queries (by count)",
_table(["query", "count", "avg_sources", "avg_latency_ms"],
top_rows, empty_msg="no queries recorded yet"),
hint="user questions ordered by frequency — "
"top-of-list queries deserve saved presets",
)
# Promote candidates
promote_rows = [
[
_esc(row["suggested_preset_name"])[:40],
_esc(row.get("sample") or row["fingerprint"])[:50],
str(row["count"]),
f'{row.get("avg_sources", 0.0):.2f}',
f'{row.get("avg_latency_ms", 0.0):.0f}',
]
for row in promote_candidates
]
promote_block = _card(
f'Promote candidates ({len(promote_candidates)})',
_table(
["suggested_name", "sample", "count",
"avg_sources", "avg_latency_ms"],
promote_rows,
empty_msg="no candidates — everyone's already a preset, "
"or nothing crosses the threshold",
),
hint="queries asked ≥ 3× that aren't already saved as presets — "
"click POST /v1/admin/queries/promote to ship them all",
)
# Heatmap
heat_block = _card(
f'Query × doc affinity matrix ({len(matrix_queries)} × '
f'{len(matrix_docs)})',
_heatmap(matrix_queries, matrix_docs, matrix_pairs),
hint="darker = more often cited together. cells reveal "
"which docs carry which questions",
)
meta_html = ""
if refresh_sec > 0:
meta_html = f'<meta http-equiv="refresh" content="{int(refresh_sec)}">'
return f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>tau-rag · query analytics</title>
{meta_html}
<style>
body {{
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui,
sans-serif;
margin: 0; padding: 24px;
background: #f9fafb; color: #1f2937;
}}
.wrap {{ max-width: 1200px; margin: 0 auto; }}
h1 {{ margin: 0 0 4px 0; font-size: 22px; }}
.sub {{ color: #6b7280; font-size: 13px; margin: 0 0 20px 0; }}
</style>
</head>
<body>
<div class="wrap">
<h1>🔎 tau-rag · query analytics</h1>
<div class="sub">v1.89 fingerprints + v1.90 promote candidates +
v1.96 query × doc affinity, combined into one view.
{'Auto-refresh every ' + str(int(refresh_sec)) + 's.'
if refresh_sec > 0 else ''}
</div>
{banner}
{top_block}
{promote_block}
{heat_block}
</div>
</body>
</html>"""
__all__ = ["render_query_analytics_ui"]