Spaces:

Legal-i
/

legal-eye

Running

App Files Files Community

legal-eye / tau_rag /api /query_analytics_ui.py

Legal-i

Initial deploy: legal-eye Hebrew legal RAG (17K corpus, verbatim-from-precedent)

3be54c6 verified about 1 month ago

raw

history blame contribute delete

9.41 kB

	"""Query analytics HTML dashboard (v1.98).

	Visual complement to v1.89 (query_stats) + v1.90 (promote candidates)
	+ v1.96 (query × doc affinity). Same design as v1.86 content health UI:
	inline CSS, zero JS framework, zero CDN, ?refresh=N for wall screens.

	Sections:
	* Banner: n_unique_queries, n_events, avg_sources_per_query.
	* Top queries (by count).
	* Promote candidates (count ≥ threshold, not already a preset).
	* Query × doc affinity — top N queries × top M docs as a heat-table.
	* Isolated queries — those with 0 cited docs (retrieval gap).

	Style mirrors the v1.86 dashboard so ops have one visual language.
	"""
	from __future__ import annotations

	import html as _h
	from typing import Any, Dict, List, Optional


	def _esc(s: object) -> str:
	return _h.escape(str(s), quote=True)


	def _table(headers: List[str], rows: List[List[str]],
	empty_msg: str = "— no data yet —") -> str:
	if not rows:
	return (f'<div style="padding:10px 14px;color:#6b7280;'
	f'font-style:italic">{_esc(empty_msg)}</div>')
	th = "".join(f'<th style="text-align:left;padding:6px 10px;'
	f'background:#f9fafb;border-bottom:1px solid #e5e7eb;'
	f'font-weight:600;font-size:13px">{_esc(h)}</th>'
	for h in headers)
	body = ""
	for row in rows:
	tds = "".join(
	f'<td style="padding:6px 10px;border-bottom:1px solid #f3f4f6;'
	f'font-family:monospace;font-size:13px">{c}</td>'
	for c in row
	)
	body += f'<tr>{tds}</tr>'
	return (f'<table style="border-collapse:collapse;width:100%;'
	f'border:1px solid #e5e7eb;border-radius:6px;'
	f'overflow:hidden"><thead><tr>{th}</tr></thead>'
	f'<tbody>{body}</tbody></table>')


	def _card(title: str, body_html: str, hint: str = "") -> str:
	hint_html = (f'<div style="color:#6b7280;font-size:12px;margin-top:4px">'
	f'{_esc(hint)}</div>') if hint else ""
	return (
	f'<div style="background:#fff;border:1px solid #e5e7eb;'
	f'border-radius:8px;padding:14px 18px;margin:12px 0">'
	f'<h3 style="margin:0 0 10px 0;font-size:15px">{_esc(title)}</h3>'
	f'{hint_html}'
	f'{body_html}</div>'
	)


	def _cell_color(count: int, max_count: int) -> str:
	"""Heatmap cell background: empty (0) → neutral, max → deep blue."""
	if count <= 0 or max_count <= 0:
	return "#f9fafb"
	ratio = min(1.0, count / max_count)
	# Blend from #f9fafb (light) to #1e40af (deep blue)
	# Simple mix: higher count = darker blue
	r = int(0xf9 + (0x1e - 0xf9) * ratio)
	g = int(0xfa + (0x40 - 0xfa) * ratio)
	b = int(0xfb + (0xaf - 0xfb) * ratio)
	return f"#{r:02x}{g:02x}{b:02x}"


	def _heatmap(queries: List[Dict], docs: List[str],
	pair_lookup: Dict, ) -> str:
	"""Render a query × doc heat table. pair_lookup maps (fp, doc_id) → count."""
	if not queries or not docs:
	return ('<div style="padding:10px 14px;color:#6b7280;'
	'font-style:italic">— not enough data for matrix —</div>')
	max_count = 0
	for q in queries:
	for did in docs:
	c = pair_lookup.get((q["fingerprint"], did), 0)
	if c > max_count:
	max_count = c
	header_cells = "".join(
	f'<th style="padding:4px 8px;background:#f9fafb;'
	f'border-bottom:1px solid #e5e7eb;font-weight:600;font-size:12px;'
	f'writing-mode:horizontal-tb;max-width:120px;'
	f'word-break:break-all">{_esc(did)}</th>'
	for did in docs
	)
	body_rows = ""
	for q in queries:
	row_label = f'{_esc(q.get("sample") or q["fingerprint"])[:40]}'
	row_cells = ""
	for did in docs:
	c = pair_lookup.get((q["fingerprint"], did), 0)
	bg = _cell_color(c, max_count)
	text = str(c) if c > 0 else ""
	color = "#fff" if c > max_count * 0.6 else "#1f2937"
	row_cells += (
	f'<td style="text-align:center;padding:6px 10px;'
	f'background:{bg};color:{color};'
	f'font-family:monospace;font-size:12px;'
	f'border-right:1px solid #e5e7eb">{text}</td>'
	)
	body_rows += (
	f'<tr><td style="text-align:right;padding:6px 10px;'
	f'font-family:monospace;font-size:12px;color:#374151;'
	f'border-right:1px solid #e5e7eb;max-width:200px;'
	f'white-space:nowrap;overflow:hidden;text-overflow:ellipsis">'
	f'{row_label}</td>{row_cells}</tr>'
	)
	return (
	f'<table style="border-collapse:collapse;width:100%;'
	f'border:1px solid #e5e7eb;border-radius:6px;overflow:hidden">'
	f'<thead><tr><th style="padding:4px 8px;background:#f9fafb;'
	f'border-bottom:1px solid #e5e7eb">query ↓ / doc →</th>'
	f'{header_cells}</tr></thead>'
	f'<tbody>{body_rows}</tbody></table>'
	)


	def render_query_analytics_ui(
	summary: Dict[str, Any],
	top_queries: List[Dict[str, Any]],
	promote_candidates: List[Dict[str, Any]],
	matrix_queries: List[Dict[str, Any]],
	matrix_docs: List[str],
	matrix_pairs: Dict,
	refresh_sec: int = 0,
	) -> str:
	"""Render the HTML. All inputs are plain dicts/lists — pure render.

	* ``matrix_pairs``: dict keyed by ``(fingerprint, doc_id)`` → count.
	Caller pre-computes this so the renderer is O(N·M) lookup only.
	"""
	# Banner
	banner = (
	f'<div style="background:#fff;border:2px solid #2563eb;'
	f'border-radius:12px;padding:16px 22px;margin-bottom:16px">'
	f'<div style="color:#6b7280;font-size:12px;text-transform:uppercase;'
	f'letter-spacing:0.08em">Query analytics overview</div>'
	f'<div style="display:flex;gap:32px;margin-top:6px">'
	f'<div><div style="color:#6b7280;font-size:12px">unique queries'
	f'</div><div style="font-family:monospace;font-size:24px;'
	f'font-weight:600">{summary.get("n_unique_queries", 0)}</div></div>'
	f'<div><div style="color:#6b7280;font-size:12px">total events'
	f'</div><div style="font-family:monospace;font-size:24px;'
	f'font-weight:600">{summary.get("n_events", 0)}</div></div>'
	f'<div><div style="color:#6b7280;font-size:12px">avg sources'
	f'</div><div style="font-family:monospace;font-size:24px;'
	f'font-weight:600">'
	f'{summary.get("avg_sources_per_query", 0.0):.2f}</div></div>'
	f'</div></div>'
	)

	# Top queries table
	top_rows = [
	[
	_esc(row.get("sample") or row["fingerprint"])[:60],
	str(row["count"]),
	f'{row.get("avg_sources", 0.0):.2f}',
	f'{row.get("avg_latency_ms", 0.0):.0f}',
	]
	for row in top_queries
	]
	top_block = _card(
	"Top queries (by count)",
	_table(["query", "count", "avg_sources", "avg_latency_ms"],
	top_rows, empty_msg="no queries recorded yet"),
	hint="user questions ordered by frequency — "
	"top-of-list queries deserve saved presets",
	)

	# Promote candidates
	promote_rows = [
	[
	_esc(row["suggested_preset_name"])[:40],
	_esc(row.get("sample") or row["fingerprint"])[:50],
	str(row["count"]),
	f'{row.get("avg_sources", 0.0):.2f}',
	f'{row.get("avg_latency_ms", 0.0):.0f}',
	]
	for row in promote_candidates
	]
	promote_block = _card(
	f'Promote candidates ({len(promote_candidates)})',
	_table(
	["suggested_name", "sample", "count",
	"avg_sources", "avg_latency_ms"],
	promote_rows,
	empty_msg="no candidates — everyone's already a preset, "
	"or nothing crosses the threshold",
	),
	hint="queries asked ≥ 3× that aren't already saved as presets — "
	"click POST /v1/admin/queries/promote to ship them all",
	)

	# Heatmap
	heat_block = _card(
	f'Query × doc affinity matrix ({len(matrix_queries)} × '
	f'{len(matrix_docs)})',
	_heatmap(matrix_queries, matrix_docs, matrix_pairs),
	hint="darker = more often cited together. cells reveal "
	"which docs carry which questions",
	)

	meta_html = ""
	if refresh_sec > 0:
	meta_html = f'<meta http-equiv="refresh" content="{int(refresh_sec)}">'

	return f"""<!doctype html>
	<html lang="en">
	<head>
	<meta charset="utf-8">
	<title>tau-rag · query analytics</title>
	{meta_html}
	<style>
	body {{
	font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui,
	sans-serif;
	margin: 0; padding: 24px;
	background: #f9fafb; color: #1f2937;
	}}
	.wrap {{ max-width: 1200px; margin: 0 auto; }}
	h1 {{ margin: 0 0 4px 0; font-size: 22px; }}
	.sub {{ color: #6b7280; font-size: 13px; margin: 0 0 20px 0; }}
	</style>
	</head>
	<body>
	<div class="wrap">
	<h1>🔎 tau-rag · query analytics</h1>
	<div class="sub">v1.89 fingerprints + v1.90 promote candidates +
	v1.96 query × doc affinity, combined into one view.
	{'Auto-refresh every ' + str(int(refresh_sec)) + 's.'
	if refresh_sec > 0 else ''}
	</div>
	{banner}
	{top_block}
	{promote_block}
	{heat_block}
	</div>
	</body>
	</html>"""


	__all__ = ["render_query_analytics_ui"]