"""FastAPI app.

Run:  uvicorn tau_rag.api.fastapi_app:app --reload
"""
from __future__ import annotations

from contextlib import asynccontextmanager
from typing import Any, Dict, List, Optional

try:
    from fastapi import FastAPI, HTTPException
    from fastapi.exceptions import RequestValidationError
    from pydantic import BaseModel
except Exception as e:  # pragma: no cover
    raise RuntimeError(
        "FastAPI not installed. `pip install tau-rag[api]`."
    ) from e

from ..core.config import Config
from ..core.types import Document, Query, Strategy
from ..pipeline import Pipeline
from .models import (
    BatchQueryRequest,
    ChatRequest,
    DocumentBody,
    DocumentsRequest,
    FeedbackRequest,
    QueryPresetBody,
    QueryRequest,
    SearchRequest,
)
from .routers import (
    admin_content_router,
    admin_controls_router,
    admin_ops_router,
    admin_runtime_router,
    chat_router,
    documents_router,
    public_router,
    system_router,
)
from .errors import (
    ErrorCode, Limits, build_error_body,
    validate_query_text, validate_doc_list, validate_k,
)


# ────────────────────────────────────────────────────────────────────
# Production gate — refuse to boot when external LLM keys are set.
# legal-eye is verbatim-from-precedent (see CLAUDE.md "No external LLM");
# a leaked key in env would silently change answer behavior in code paths
# that probe the env.
#
# v2.89 — INVERTED: this assertion is now **default-on**. A developer
# who legitimately needs LLM keys in their local env (for unrelated
# tools) can opt out with TAU_RAG_ALLOW_EXTERNAL_LLM=true. Production
# environments (HF Space, etc.) MUST NOT set that flag.
# ────────────────────────────────────────────────────────────────────
import os as _os  # noqa: E402

_LE_ALLOW_LLM = (_os.environ.get("TAU_RAG_ALLOW_EXTERNAL_LLM") or "").lower() in ("1", "true", "yes")
if not _LE_ALLOW_LLM:
    if _os.environ.get("ANTHROPIC_API_KEY") or _os.environ.get("OPENAI_API_KEY"):
        raise RuntimeError(
            "legal-eye refuses to boot with ANTHROPIC_API_KEY or "
            "OPENAI_API_KEY in env — the product is verbatim-from-"
            "precedent and external LLM access is a brand/regulatory "
            "breaking change. Unset both keys, OR if you are running "
            "local dev with these keys present for unrelated tools, "
            "set TAU_RAG_ALLOW_EXTERNAL_LLM=true to bypass. Production "
            "environments must NEVER set the bypass flag."
        )

app = FastAPI(title="TAU-RAG", version="2.0.0")


# ---------------------------------------------------- CORS + security
from fastapi.middleware.cors import CORSMiddleware     # noqa: E402
from starlette.responses import Response  # for response_class=Response
from .security import cors_config_from_env, apply_security_headers  # noqa: E402

_cors_cfg = cors_config_from_env()
if _cors_cfg["allow_origins"]:
    app.add_middleware(CORSMiddleware, **_cors_cfg)


@app.middleware("http")
async def _security_headers_middleware(request, call_next):
    response = await call_next(request)
    apply_security_headers(response.headers)
    return response


# ---------------------------------------------------- global error handlers
def _rid_from(request) -> Optional[str]:
    return getattr(getattr(request, "state", None), "request_id", None)


def _maybe_translate_body(body, request) -> Dict[str, Any]:
    """v2.26 — translate error.message via Accept-Language header.
    Safe: returns body unchanged if no match or no header."""
    try:
        if not isinstance(body, dict) or "error" not in body:
            return body
        err = body["error"]
        if not isinstance(err, dict):
            return body
        accept = (request.headers.get("accept-language")
                  if request is not None else None)
        if not accept:
            return body
        from ..middleware.i18n import get_i18n
        code = err.get("code")
        message = err.get("message", "")
        translated = get_i18n().translate(code, message, accept)
        if translated != message:
            err["message"] = translated
    except Exception:
        pass
    return body


@app.exception_handler(HTTPException)
async def _http_exc_handler(request, exc: HTTPException):
    # Map FastAPI's status codes to our canonical codes
    code_map = {
        400: ErrorCode.VALIDATION_ERROR,
        401: ErrorCode.UNAUTHORIZED,
        403: ErrorCode.ADMIN_REQUIRED,
        404: ErrorCode.NOT_FOUND,
        413: ErrorCode.PAYLOAD_TOO_LARGE,
        422: ErrorCode.VALIDATION_ERROR,
        429: ErrorCode.RATE_LIMITED,
    }
    code = code_map.get(exc.status_code, ErrorCode.INTERNAL_ERROR)
    detail = exc.detail
    message = detail if isinstance(detail, str) else "request failed"
    details = detail if isinstance(detail, dict) else None
    body = build_error_body(code, message, _rid_from(request), details)
    body = _maybe_translate_body(body, request)
    headers = dict(exc.headers or {})
    rid = _rid_from(request)
    if rid:
        headers["X-Request-ID"] = rid
    return JSONResponse(status_code=exc.status_code, content=body, headers=headers)


@app.exception_handler(RequestValidationError)
async def _validation_exc_handler(request, exc: RequestValidationError):
    body = build_error_body(
        ErrorCode.VALIDATION_ERROR,
        "request failed validation",
        _rid_from(request),
        details={"errors": exc.errors()},
    )
    body = _maybe_translate_body(body, request)
    headers = {"X-Request-ID": _rid_from(request)} if _rid_from(request) else {}
    return JSONResponse(status_code=422, content=body, headers=headers)


@app.exception_handler(Exception)
async def _unhandled_exc_handler(request, exc: Exception):
    body = build_error_body(
        ErrorCode.INTERNAL_ERROR,
        f"{type(exc).__name__}: {exc}"[:300],
        _rid_from(request),
    )
    headers = {"X-Request-ID": _rid_from(request)} if _rid_from(request) else {}
    return JSONResponse(status_code=500, content=body, headers=headers)

# ----------------------------------------------------------------- middleware
from fastapi import Request, UploadFile, File, Form
from fastapi.responses import JSONResponse
from collections import Counter

from ..middleware import get_cache, get_limiter
from ..middleware.auth import get_auth
from ..middleware.ratelimit import RateLimitExceeded
from ..middleware.observability import (
    get_obs, generate_request_id, RequestLog, _hash_prefix,
)
from ..middleware.maintenance import get_maintenance
from ..middleware.pii_redaction import get_pii_redactor
from ..middleware.slow_queries import get_slow_tracker, SlowRecord
from ..middleware.quota import get_quota_tracker
from ..middleware.idempotency import get_idempotency_store
from ..middleware.request_timeout import get_timeout_guard
from ..middleware.body_limit import get_body_limit_guard
from ..middleware.deprecation import (
    get_deprecation_registry, build_headers as _build_dep_headers,
)
from ..middleware.cost import get_cost_tracker
from ..middleware.compression import get_compression_tracker
from ..middleware.ip_allowlist import get_ip_allowlist
from ..middleware.query_complexity import get_query_complexity
from ..middleware.key_labels import get_key_labels
from ..middleware.i18n import get_i18n
from ..middleware.slo import get_slo_tracker
from ..middleware.prom_histograms import get_prom_histograms
from ..middleware.cost_alerts import get_cost_alerts
from ..middleware.async_jobs import get_job_queue
from ..middleware.etag import (
    compute_etag as _compute_etag,
    matches_if_none_match as _matches_inm,
    get_etag_tracker,
)
from ..middleware.hmac_signing import get_hmac_signing
from ..middleware.traceparent import (
    get_traceparent_manager,
    parse_traceparent as _parse_tp,
    build_traceparent as _build_tp,
)
from ..middleware.baggage import (
    get_baggage_manager,
    parse_baggage as _parse_baggage,
    build_baggage as _build_baggage,
)
from ..middleware.autocomplete import get_autocomplete
import asyncio as _asyncio
import time as _time
# Also expose as plain `time` so module-level expressions and
# new handlers can use `time.time()` without per-function imports.
import time


@app.middleware("http")
async def auth_and_ratelimit_middleware(request: Request, call_next):
    t0 = _time.time()
    path = request.url.path
    protected = (path.startswith("/v1/generate") or path.startswith("/v1/chat")
                 or path.startswith("/v1/documents"))
    admin_only = path.startswith("/v1/admin/")

    api_key = request.headers.get("x-api-key")
    # X-Request-ID — honor client-supplied or generate our own
    request_id = request.headers.get("x-request-id") or generate_request_id()
    # Stash on request.state so handlers can correlate if they want
    request.state.request_id = request_id

    # v1.99 — optional body capture for replay. Opt-in via env var
    # TAU_RAG_OBS_CAPTURE_BODY=1. Only captures bodies for the replay-
    # able endpoints (search/generate/chat) to keep log size bounded
    # and avoid picking up admin request bodies with keys.
    captured_body: Optional[str] = None
    _REPLAY_CAPTURE_PATHS = (
        "/v1/search", "/v1/generate", "/v1/chat",
    )
    # v2.24 — also trigger body capture if query-complexity scoring
    # is enabled, so the check has something to score.
    _needs_body = (
        _os.environ.get("TAU_RAG_OBS_CAPTURE_BODY") == "1"
        or get_query_complexity().is_enabled()
    )
    if (_needs_body
            and request.method == "POST"
            and any(path.startswith(p) for p in _REPLAY_CAPTURE_PATHS)):
        try:
            raw = await request.body()
            # Truncate to 4KB — real queries are under 1KB, legal
            # texts rarely reach 2KB. Very-long payloads get flagged
            # but replay won't work on them (acceptable).
            if raw is not None:
                captured_body = raw[:4096].decode("utf-8", errors="replace")
                # v2.8 — PII redaction. When TAU_RAG_PII_REDACT=1 (or
                # admin flipped via endpoint), scrub Israeli IDs, phone
                # numbers, emails, and CC-like digit runs from the
                # captured text BEFORE it hits the observability log,
                # JSONL file, stdout, or SSE tail. No-op when disabled.
                captured_body = get_pii_redactor().redact(captured_body)
                # Put the already-read body back on the request so the
                # downstream handler still sees it.
                async def _receive() -> Dict[str, Any]:
                    return {"type": "http.request", "body": raw,
                            "more_body": False}
                request._receive = _receive   # type: ignore[attr-defined]
        except Exception:
            captured_body = None

    def _log(status: int, error: Optional[str] = None) -> None:
        extra: Dict[str, Any] = {}
        if error:
            extra["error"] = error
        if captured_body is not None:
            extra["body"] = captured_body
        # v2.25 — attach per-key labels (tenant, tier, region, ...) so
        # they auto-propagate to log rows / dashboards. No-op when
        # key has no labels configured.
        if api_key:
            try:
                labels = get_key_labels().get_labels(api_key)
                if labels:
                    extra["labels"] = labels
            except Exception:
                pass
        latency_ms = (_time.time() - t0) * 1000.0
        get_obs().record(RequestLog(
            ts=_time.time(),
            request_id=request_id,
            method=request.method,
            path=path,
            status=status,
            latency_ms=latency_ms,
            key_hash_prefix=_hash_prefix(api_key),
            client_ip=(request.client.host if request.client else None),
            user_agent=request.headers.get("user-agent"),
            event_type="request",
            extra=extra,
        ))
        # v2.9 — also feed the slow-query tracker. No-op if threshold=0
        # or if the request was fast enough. Kept off the observability
        # log hot-path: cheap dict append in the tracker.
        try:
            get_slow_tracker().maybe_record(SlowRecord(
                ts=_time.time(),
                request_id=request_id,
                method=request.method,
                path=path,
                status=status,
                latency_ms=latency_ms,
                error=error,
            ))
        except Exception:
            pass
        # v2.30 — feed every request into SLO samplers (latency /
        # availability). Lock-held inside tracker; no external I/O.
        try:
            get_slo_tracker().record_request(status, latency_ms)
        except Exception:
            pass
        # v2.33 — observe in Prometheus histogram per path.
        try:
            get_prom_histograms().observe(path, latency_ms)
        except Exception:
            pass
        # v2.21 — cost tracking. Records calls and estimated tokens
        # per API key (hashed prefix). No-op if disabled or no rates
        # configured. Zero overhead when off.
        if 200 <= status < 300 and api_key:
            try:
                req_bytes = (len(captured_body or "")
                             if captured_body else 0)
                get_cost_tracker().record(
                    raw_key=api_key, path=path,
                    req_bytes=req_bytes,
                    # response size unknown at this middleware layer
                    # without consuming the body; estimate req only.
                    resp_bytes=0,
                )
                # v2.32 — check cost alert thresholds. Fires a webhook
                # via v1.71 dispatcher on first crossing of each
                # configured threshold.
                from ..middleware.cost import (
                    _hash_prefix as _cost_hash_prefix,
                )
                kp = _cost_hash_prefix(api_key)
                if kp:
                    usage = get_cost_tracker().usage_for_key(kp)
                    alerts = get_cost_alerts().check_and_alert(
                        kp, float(usage.get("cost_usd", 0.0)))
                    if alerts:
                        from ..middleware import (
                            get_webhook_dispatcher,
                        )
                        for a in alerts:
                            try:
                                get_webhook_dispatcher().dispatch(a)
                            except Exception:
                                pass
                            get_obs().audit(
                                "cost.alert_fired",
                                key_prefix=a["key_prefix"],
                                threshold_usd=a["threshold_usd"],
                                current_cost_usd=a["current_cost_usd"],
                            )
            except Exception:
                pass

    # v2.16 — request body size limit. Cheap Content-Length pre-check
    # so large bodies don't traverse auth/quota just to get rejected.
    _body_guard = get_body_limit_guard()
    if _body_guard.is_enabled():
        _body_guard.record_check()
        cl_header = request.headers.get("content-length")
        if cl_header:
            try:
                cl = int(cl_header)
                if cl > _body_guard.max_bytes:
                    _body_guard.record_reject()
                    _log(413, error="body_too_large")
                    return JSONResponse(
                        status_code=413,
                        headers={"X-Request-ID": request_id},
                        content=build_error_body(
                            ErrorCode.PAYLOAD_TOO_LARGE,
                            f"body too large: {cl} > {_body_guard.max_bytes}",
                            request_id=request_id,
                            details={"content_length": cl,
                                     "max_bytes": _body_guard.max_bytes},
                        ),
                    )
            except ValueError:
                pass

    # 1. Auth check (only if TAU_RAG_REQUIRE_AUTH is set OR admin path)
    auth = get_auth()
    # v2.7 — maintenance / drain mode. Admin traffic always flows (so
    # operators can turn it off again); everyone else gets 503 +
    # Retry-After. Check happens AFTER auth object is available so we
    # can ask ``is_admin(key)`` but BEFORE rate limiting — otherwise a
    # drained pod would count rejected requests against the limiter,
    # polluting stats.
    # v2.11 — k8s probes (/livez, /readyz) must always reach the
    # handler so the probe reflects true readiness. Drain is ONE of
    # several reasons a pod might be unready; the probe itself (via
    # the readiness registry's ``not_draining`` check) signals it.
    # Blocking the probe at middleware level would mask other
    # unreadiness signals during drain.
    _PROBE_PATHS = ("/livez", "/readyz")
    maint = get_maintenance()
    if (maint.is_enabled() and not admin_only
            and not auth.is_admin(api_key)
            and path not in _PROBE_PATHS):
        snap = maint.snapshot()
        _log(503, error="maintenance")
        return JSONResponse(
            status_code=503,
            headers={
                "Retry-After": str(int(snap["retry_after"])),
                "X-Request-ID": request_id,
            },
            content=build_error_body(
                ErrorCode.INTERNAL_ERROR,
                "service is in maintenance mode",
                request_id=request_id,
                details={
                    "reason": snap["reason"],
                    "retry_after": int(snap["retry_after"]),
                    "maintenance_since_sec": round(
                        snap["duration_sec"], 2),
                },
            ),
        )

    if admin_only:
        if not auth.is_admin(api_key):
            _log(401, error="admin_required")
            body = build_error_body(
                ErrorCode.ADMIN_REQUIRED,
                "admin scope required",
                request_id=request_id,
                details={"hint": "pass X-API-Key with admin scope"},
            )
            body = _maybe_translate_body(body, request)
            return JSONResponse(
                status_code=401,
                headers={"X-Request-ID": request_id},
                content=body,
            )
    elif protected and auth.required:
        scope = "write" if request.method in ("POST", "PUT", "DELETE") else "read"
        if not auth.validate(api_key, scope=scope):
            _log(401, error="unauthorized")
            body = build_error_body(
                ErrorCode.UNAUTHORIZED,
                "missing or invalid X-API-Key",
                request_id=request_id,
                details={"required_scope": scope},
            )
            body = _maybe_translate_body(body, request)
            return JSONResponse(
                status_code=401,
                headers={"X-Request-ID": request_id},
                content=body,
            )

    # v2.23 — per-API-key IP allowlist. Runs right after auth, before
    # quota/rate-limit: blocks stolen-key replay from unknown IPs
    # before they consume any per-key counters. No-op for keys with
    # no allowlist configured (opt-in per key).
    if api_key:
        client_ip_for_acl = (request.client.host
                             if request.client else None)
        if not get_ip_allowlist().is_allowed(api_key, client_ip_for_acl):
            _log(403, error="ip_not_allowed")
            return JSONResponse(
                status_code=403,
                headers={"X-Request-ID": request_id},
                content=build_error_body(
                    ErrorCode.UNAUTHORIZED,
                    "request IP not in allowlist for this key",
                    request_id=request_id,
                    details={"client_ip": client_ip_for_acl},
                ),
            )

    # v2.35 — HMAC signature verification. For keys registered in the
    # HMAC store, the client MUST present a valid X-Signature header
    # + X-Timestamp header. Unsigned keys pass through. Signed
    # requests need fresh timestamps (within skew window) and
    # canonical-string signatures matching the shared secret.
    if api_key:
        hmac_store = get_hmac_signing()
        from ..middleware.hmac_signing import _key_prefix as _hm_prefix
        _hm_pfx = _hm_prefix(api_key)
        if hmac_store.requires_signing(_hm_pfx):
            # Need body for the signature canonicalization
            sig = request.headers.get("x-signature")
            ts = request.headers.get("x-timestamp")
            raw_body = captured_body.encode("utf-8") if captured_body else b""
            if not captured_body and request.method == "POST":
                # Ensure body captured for HMAC even if other capture
                # flags are off
                try:
                    raw_body = await request.body()
                    async def _receive_hmac() -> Dict[str, Any]:
                        return {"type": "http.request", "body": raw_body,
                                "more_body": False}
                    request._receive = _receive_hmac   # type: ignore[attr-defined]
                except Exception:
                    raw_body = b""
            ok, reason = hmac_store.verify(
                raw_key=api_key,
                method=request.method,
                path=path,
                body=raw_body,
                signature=sig,
                timestamp=ts,
            )
            if not ok:
                _log(401, error=f"hmac_{reason}")
                return JSONResponse(
                    status_code=401,
                    headers={"X-Request-ID": request_id},
                    content=build_error_body(
                        ErrorCode.UNAUTHORIZED,
                        f"HMAC verification failed: {reason}",
                        request_id=request_id,
                        details={"reason": reason},
                    ),
                )

    # 2. v2.12 — Per-API-key daily quota. Runs BEFORE rate limit so
    # the per-second limiter doesn't deduct tokens for requests that
    # will be rejected anyway. Skipped for:
    #   - unauthenticated paths (no key to meter)
    #   - whitelisted clients (same as rate limiter whitelist)
    #   - admin-only paths (admin already auth'd; no quota)
    # Only applies when the key actually has a quota configured —
    # unquotaed keys are unlimited (same as pre-v2.12 behavior).
    if api_key and not admin_only:
        client_ip = request.client.host if request.client else None
        if (api_key not in get_limiter().whitelist
                and client_ip not in get_limiter().whitelist):
            qc = get_quota_tracker().check_and_increment(api_key)
            if not qc.ok:
                _log(429, error="quota_exceeded")
                return JSONResponse(
                    status_code=429,
                    headers={
                        "Retry-After": str(qc.reset_in_sec),
                        "X-Request-ID": request_id,
                        "X-Quota-Limit": str(qc.limit),
                        "X-Quota-Used": str(qc.used),
                    },
                    content=build_error_body(
                        ErrorCode.RATE_LIMITED,
                        f"daily quota exceeded: {qc.used}/{qc.limit}",
                        request_id=request_id,
                        details={
                            "quota": "daily",
                            "limit": qc.limit,
                            "used": qc.used,
                            "reset_in_sec": qc.reset_in_sec,
                            "key_prefix": qc.key_prefix,
                        },
                    ),
                )

    # 3. Rate limit (skip admin — already auth'd)
    if protected:
        try:
            client_key = (
                api_key
                or (request.client.host if request.client else "unknown")
            )
            # v1.73 — pass path so the limiter can apply per-endpoint overrides
            get_limiter().acquire(client_key, path=path)
        except RateLimitExceeded as e:
            _log(429, error="rate_limited")
            return JSONResponse(
                status_code=429,
                headers={"Retry-After": f"{e.retry_after:.1f}",
                         "X-Request-ID": request_id},
                content=build_error_body(
                    ErrorCode.RATE_LIMITED,
                    f"rate limit exceeded for {e.key!r}",
                    request_id=request_id,
                    details={"retry_after": round(e.retry_after, 3),
                             "key": e.key},
                ),
            )
    # v2.24 — query complexity pre-check. For POSTs on the query
    # endpoints, score the query and reject with 400 if it exceeds
    # configured thresholds. Uses the captured body (reuses the same
    # read that idempotency does). Non-POST / non-query paths skip.
    _COMPLEXITY_PATHS = ("/v1/generate", "/v1/chat", "/v1/search")
    if (captured_body is not None
            and request.method == "POST"
            and any(path.startswith(p) for p in _COMPLEXITY_PATHS)
            and get_query_complexity().is_enabled()):
        try:
            import json as _json2
            parsed = _json2.loads(captured_body)
            q = parsed.get("query", "") if isinstance(parsed, dict) else ""
            if q:
                scored = get_query_complexity().score(q)
                if scored.over_limit:
                    _log(400, error="query_too_complex")
                    return JSONResponse(
                        status_code=400,
                        headers={"X-Request-ID": request_id},
                        content=build_error_body(
                            ErrorCode.VALIDATION_ERROR,
                            scored.reason or "query too complex",
                            request_id=request_id,
                            details={
                                "n_tokens": scored.n_tokens,
                                "n_chars": scored.n_chars,
                                "unique_ratio": scored.unique_ratio,
                                "score": scored.score,
                            },
                        ),
                    )
        except Exception:
            pass

    # v2.13 — Idempotency-Key check BEFORE dispatch. Scoped to
    # (api_key_prefix, idempotency_key) so two clients using the same
    # key don't collide. POST only, whitelisted paths only.
    _IDEMPOTENT_PATHS = ("/v1/generate", "/v1/chat", "/v1/search")
    idem_key = request.headers.get("idempotency-key")
    idem_eligible = (
        idem_key
        and request.method == "POST"
        and any(path.startswith(p) for p in _IDEMPOTENT_PATHS)
    )
    if idem_eligible:
        idem_scope = _hash_prefix(api_key) or (
            request.client.host if request.client else "anon")
        cached = get_idempotency_store().get(idem_scope, idem_key)
        if cached is not None:
            _log(cached.status, error="idempotent_hit")
            headers = {
                "X-Request-ID": request_id,
                "X-Idempotency-Hit": "1",
                "X-Idempotency-Key": idem_key,
                **cached.headers_extra,
            }
            return JSONResponse(
                status_code=cached.status,
                content=cached.body,
                headers=headers,
            )

    # 3. Dispatch + log
    # v2.4 — set request_id on the tracer's thread-local so all pipeline
    # spans (v1.27) created during handler execution get auto-tagged.
    try:
        from ..observability.tracing import get_tracer
        _t = get_tracer()
        _t.set_request_id(request_id)
    except Exception:
        _t = None
    # v2.14 + v2.20 — wall-clock timeout enforcement. v2.20 adds
    # per-endpoint overrides: /v1/generate might get 30s while
    # /v1/search gets 5s. Longest-prefix match; 0 = no enforcement.
    _guard = get_timeout_guard()
    _guard.record_request()
    _to_prefix, _to_ms = _guard.resolve(path)
    try:
        if _to_ms > 0:
            try:
                response = await _asyncio.wait_for(
                    call_next(request),
                    timeout=_to_ms / 1000.0,
                )
            except _asyncio.TimeoutError:
                _guard.record_timeout(path=path)
                _log(504, error="request_timeout")
                return JSONResponse(
                    status_code=504,
                    headers={"X-Request-ID": request_id},
                    content=build_error_body(
                        ErrorCode.INTERNAL_ERROR,
                        f"request exceeded {_to_ms:.0f}ms timeout",
                        request_id=request_id,
                        details={"timeout_ms": _to_ms,
                                 "path": path,
                                 "matched_prefix": _to_prefix or "(global)"},
                    ),
                )
        else:
            response = await call_next(request)
    except Exception as e:
        _log(500, error=f"{type(e).__name__}: {e}")
        raise
    finally:
        if _t is not None:
            try:
                _t.set_request_id(None)
            except Exception:
                pass
    response.headers["X-Request-ID"] = request_id

    # v2.48 — W3C traceparent propagation. If enabled, parse inbound
    # traceparent header (record stats) and emit one on response so
    # downstream callers can correlate. When inbound missing, we
    # generate a new trace_id + span_id so clients still get a header.
    try:
        _tp_mgr = get_traceparent_manager()
        if _tp_mgr.is_enabled():
            inbound = request.headers.get("traceparent")
            if inbound:
                parsed = _parse_tp(inbound)
                _tp_mgr.record_parsed(parsed is not None)
                if parsed:
                    # Emit with same trace_id, new span_id
                    response.headers["traceparent"] = _build_tp(
                        trace_id=parsed["trace_id"], parent_id=None,
                        sampled=True,
                    )
                else:
                    response.headers["traceparent"] = _build_tp()
            else:
                response.headers["traceparent"] = _build_tp()
            _tp_mgr.record_emitted()
    except Exception:
        pass

    # v2.52 — W3C Baggage: parse inbound, stash on request state,
    # re-emit on response for downstream propagation.
    try:
        _bg_mgr = get_baggage_manager()
        if _bg_mgr.is_enabled():
            inbound_bg = request.headers.get("baggage")
            if inbound_bg:
                items = _parse_baggage(inbound_bg)
                if items:
                    _bg_mgr.record_parsed(items)
                    # Re-emit
                    response.headers["baggage"] = _build_baggage(items)
                    _bg_mgr.record_emitted()
    except Exception:
        pass

    # v2.17 — attach deprecation / sunset headers if this path is
    # in the deprecation registry. Applied AFTER handler so we don't
    # override any headers the handler set.
    try:
        _dep_entry = get_deprecation_registry().match(path)
        if _dep_entry is not None:
            for k, v in _build_dep_headers(_dep_entry).items():
                response.headers[k] = v
    except Exception:
        pass

    # v2.13 — cache successful responses for idempotency replay.
    # Consumes body_iterator and reconstructs a response so downstream
    # still sees the full body.
    if idem_eligible and 200 <= response.status_code < 300:
        try:
            body_bytes = b""
            async for chunk in response.body_iterator:   # type: ignore[attr-defined]
                body_bytes += chunk
            import json as _json
            try:
                body_json = _json.loads(body_bytes.decode("utf-8"))
            except Exception:
                body_json = None
            if body_json is not None:
                get_idempotency_store().set(
                    idem_scope, idem_key,
                    response.status_code, body_json,
                )
            from fastapi.responses import Response as _Resp
            response = _Resp(
                content=body_bytes,
                status_code=response.status_code,
                headers=dict(response.headers),
                media_type=response.media_type,
            )
        except Exception:
            pass
    _log(response.status_code)
    return response


# ----------------------------------------------------------------- startup
def _pipeline_from_env() -> Pipeline:
    """Pick config via env var TAU_RAG_PRESET so the same container image
    can run different flavors (no_llm, hebrew_dense, mock).

    Two modes:
      1. TAU_RAG_CONFIG_JSON=path/to/config.json — load Config from JSON.
         Wins over TAU_RAG_PRESET. Useful for tau_native + custom verify
         thresholds without touching code.
      2. TAU_RAG_PRESET=<name> — pick a built-in preset from the map below.
         "tau_native" builds a Hebrew-legal config with the local TAU model
         as the generator, equivalent to configs/hebrew_legal_local.json.
    """
    import os
    json_path = os.environ.get("TAU_RAG_CONFIG_JSON", "").strip()
    if json_path:
        cfg = Config.from_json(json_path)
        return Pipeline.from_config(cfg)

    preset = os.environ.get("TAU_RAG_PRESET", "no_llm")

    def _tau_native_cfg() -> Config:
        c = Config.no_llm()
        c.generation.provider = "tau_native"
        c.generation.language = "he"
        c.rerank.enabled = False
        return c

    presets = {
        "mock":         Config.mock,
        "default":      Config.default,
        "hebrew_legal": Config.hebrew_legal,
        "no_llm":       Config.no_llm,
        # no_llm_lite: same as no_llm but WITHOUT BM25 (saves ~5GB RAM on
        # 732k corpus; trades ~31% top-5 recall for memory headroom).
        # Use TAU_RAG_PRESET=no_llm_lite to switch.
        "no_llm_lite":  Config.no_llm_lite,
        "hebrew_dense": Config.hebrew_dense,
        "tau_native":   _tau_native_cfg,
    }
    # v4.x — fallback to `no_llm` (BM25 + gematria + hilbert + graph +
    # reranker), NOT `mock` (Jaccard-only, no rerank). The original
    # fallback made an invalid preset name silently degrade retrieval
    # to a token-overlap baseline that scored ~5 points worse on
    # diagnose_v10. `no_llm` is dependency-free yet uses the full
    # retrieval stack — same as the os.environ.get default above.
    if preset not in presets:
        print(f"[tau-rag] unknown preset {preset!r}, "
              f"falling back to 'no_llm' "
              f"(valid: {sorted(presets)})")
    cfg = presets.get(preset, Config.no_llm)()
    return Pipeline.from_config(cfg)


# One shared pipeline. Swap the config for production.
_pipeline: Pipeline = _pipeline_from_env()

# v4.x — Register `_pipeline` as the global singleton in pipeline.py so
# `get_pipeline()` (used by /v1/query, /v1/data/load_jsonl, and many
# other endpoints) returns this same instance instead of lazily building
# a SECOND pipeline. Before this line, the two were independent, which
# caused community-doc promotions and per-session uploads added via
# `_pipeline.add_documents(...)` to be invisible to search (silently
# routed to a different instance). All the fixes that switched to
# `get_pipeline()` for promote/replay continue to work; this also fixes
# the symmetric problem for endpoints that still use `_pipeline.*`
# directly (get_document, list_documents, index_stats, etc.).
try:
    from ..pipeline import set_pipeline as _set_global_pipeline
    _set_global_pipeline(_pipeline)
    print("[tau-rag] pipeline singleton unified (fastapi_app + pipeline.py)")
except Exception as _unify_err:
    print(f"[tau-rag] pipeline singleton unify failed: {_unify_err}")

# Auto-restore from snapshot if TAU_RAG_SNAPSHOT_PATH is set
import os as _os
_snap_path = _os.environ.get("TAU_RAG_SNAPSHOT_PATH")
if _snap_path and _os.path.exists(_snap_path):
    try:
        _restore_summary = _pipeline.load_snapshot(_snap_path, replace=False)
        print(f"[tau-rag] restored from snapshot: {_restore_summary}")
    except Exception as _e:
        print(f"[tau-rag] snapshot restore failed: {_e}")


# Periodic auto-snapshot — fires every N seconds as a crash-proofing measure.
from ..snapshot import AutoSnapshotter, set_autosnapshotter, get_autosnapshotter  # noqa: E402

_snap_interval = _os.environ.get("TAU_RAG_SNAPSHOT_INTERVAL")
if _snap_path and _snap_interval:
    try:
        _iv = float(_snap_interval)
        if _iv > 0:
            _auto = AutoSnapshotter(
                _pipeline, _snap_path, interval_sec=_iv,
                on_save=lambda s: get_obs().audit(
                    "snapshot.auto_periodic", **s),
            )
            _auto.start()
            set_autosnapshotter(_auto)
            print(f"[tau-rag] periodic auto-snapshot every {_iv}s → {_snap_path}")
    except Exception as _e:
        print(f"[tau-rag] periodic snapshot setup failed: {_e}")


# Periodic metrics history sampler (v1.78) — optional, enabled by env var.
try:
    _metrics_iv_raw = _os.environ.get("TAU_RAG_METRICS_HISTORY_INTERVAL_SEC")
    if _metrics_iv_raw:
        _metrics_iv = float(_metrics_iv_raw)
        if _metrics_iv > 0:
            from ..middleware import (
                MetricsHistory, MetricsHistorySampler,
                get_metrics_history, set_metrics_sampler,
            )
            _mcap = int(_os.environ.get(
                "TAU_RAG_METRICS_HISTORY_CAPACITY", "720"))
            # Replace the default history with one sized from env.
            _h = MetricsHistory(max_samples=max(10, _mcap))
            from ..middleware import set_metrics_history
            set_metrics_history(_h)
            _sampler = MetricsHistorySampler(
                _h, interval_s=_metrics_iv, sample_on_start=True,
            )
            _sampler.start()
            set_metrics_sampler(_sampler)
            print(f"[tau-rag] metrics history sampler every "
                  f"{_metrics_iv}s cap={_mcap}")
except Exception as _e:
    print(f"[tau-rag] metrics history sampler setup failed: {_e}")


# Background analytics retention scheduler (v1.93) — optional, enabled
# by TAU_RAG_ANALYTICS_TTL_DAYS.
try:
    _ttl_raw = _os.environ.get("TAU_RAG_ANALYTICS_TTL_DAYS")
    if _ttl_raw:
        _ttl_days = float(_ttl_raw)
        if _ttl_days > 0:
            _prune_iv = float(_os.environ.get(
                "TAU_RAG_ANALYTICS_PRUNE_INTERVAL_SEC", "3600"))
            from ..middleware import (
                AnalyticsRetentionScheduler,
                set_retention_scheduler,
            )

            def _analytics_prune_cb(summary):
                try:
                    if summary.get("total_removed", 0) > 0:
                        get_obs().audit(
                            "analytics.prune.auto",
                            ttl_seconds=summary.get("ttl_seconds"),
                            total_removed=summary.get("total_removed"),
                        )
                    elif summary.get("error"):
                        get_obs().audit(
                            "analytics.prune.auto.error",
                            error=summary["error"],
                        )
                except Exception:
                    pass

            _retention = AnalyticsRetentionScheduler(
                ttl_seconds=_ttl_days * 86400.0,
                interval_s=_prune_iv,
                on_prune=_analytics_prune_cb,
            )
            _retention.start()
            set_retention_scheduler(_retention)
            print(f"[tau-rag] analytics retention scheduler: "
                  f"ttl={_ttl_days}d  interval={_prune_iv}s")
except Exception as _e:
    print(f"[tau-rag] retention scheduler setup failed: {_e}")


# Background alert evaluator (v1.81) — optional, enabled by env var.
try:
    _alert_iv_raw = _os.environ.get("TAU_RAG_ALERT_EVAL_INTERVAL_SEC")
    if _alert_iv_raw:
        _alert_iv = float(_alert_iv_raw)
        if _alert_iv > 0:
            from ..middleware import (
                AlertScheduler, get_alert_store, get_metrics_history,
                set_alert_scheduler,
            )

            def _alert_fire_cb(verdict):
                try:
                    get_obs().audit(
                        "alert.fired",
                        rule=verdict["rule"],
                        reason=verdict["reason"],
                        latest_value=verdict["latest_value"],
                        n_samples=verdict["n_samples"],
                    )
                except Exception:
                    pass

            _asched = AlertScheduler(
                get_alert_store(), get_metrics_history(),
                interval_s=_alert_iv, on_fire=_alert_fire_cb,
                evaluate_on_start=True,
            )
            _asched.start()
            set_alert_scheduler(_asched)
            print(f"[tau-rag] alert scheduler every {_alert_iv}s")
except Exception as _e:
    print(f"[tau-rag] alert scheduler setup failed: {_e}")


# Auto-warmup on startup if env requests it (v1.56).
if _os.environ.get("TAU_RAG_WARMUP") == "1":
    try:
        _fn = getattr(_pipeline, "warmup", None)
        if callable(_fn):
            _fn()
        _pipeline._warmed = True   # type: ignore[attr-defined]
        print("[tau-rag] auto-warmup complete")
    except Exception as _e:
        print(f"[tau-rag] auto-warmup failed: {_e}")


# v4.x — Auto-load main corpus.jsonl on startup. Eliminates the manual
# `curl /v1/data/load_jsonl` step after every restart that was costing
# us ~5 points on diagnose_v10 because the queries hit an empty pipeline
# and silently fell through to live-only retrievers.
#
# Looks at TAU_RAG_AUTOLOAD_CORPUS first; otherwise tries
# tau_rag/runtime/corpus.jsonl as a sensible default. Set
# TAU_RAG_AUTOLOAD_CORPUS=0 to disable.
def _expected_fingerprint_from_files() -> str:
    """Compute the corpus fingerprint by scanning the JSONL files on
    disk WITHOUT loading them through the pipeline. Used to decide
    whether to skip the rebuild and use persisted retriever state."""
    import hashlib
    from pathlib import Path as _PP
    import json as _ej
    here = _PP(__file__).resolve().parent.parent
    paths = [here / "runtime" / "corpus.jsonl",
             here / "runtime" / "parquet_cases.jsonl",
             here / "runtime" / "community_corpus.jsonl"]
    ids = []
    for p in paths:
        if not p.exists(): continue
        try:
            with p.open("r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try:
                        rec = _ej.loads(line)
                        if rec.get("id"):
                            ids.append(str(rec["id"]))
                    except Exception:
                        pass
        except Exception:
            pass
    if not ids:
        return ""
    h = hashlib.md5("|".join(sorted(ids)).encode("utf-8")).hexdigest()[:12]
    return f"n={len(ids)};h={h}"


# Build progress state — shared with public_system_status
# Populated during corpus autoload, read by the UI to render a live
# progress bar during long rebuilds (300k+ docs takes 15-20 min).
_build_progress: Dict[str, Any] = {"phase": "idle"}


def _build_progress_set(key: str, value: Any) -> None:
    """Update a progress field. Cheap — just dict assignment."""
    _build_progress[key] = value


def public_build_progress() -> Dict[str, Any]:
    """Snapshot of current build state — used by /v1/system/status."""
    return dict(_build_progress)


def _autoload_corpus_on_startup() -> None:
    import json as _alj
    from pathlib import Path as _P
    from ..core.types import Document as _Doc
    from ..pipeline import get_pipeline as _get_pipe

    flag = _os.environ.get("TAU_RAG_AUTOLOAD_CORPUS", "")
    if flag == "0":
        return
    explicit = _os.environ.get("TAU_RAG_AUTOLOAD_CORPUS_PATH")
    if explicit:
        path = _P(explicit).expanduser().resolve()
    else:
        # Default: tau_rag/runtime/corpus.jsonl (relative to this module)
        here = _P(__file__).resolve().parent.parent
        path = here / "runtime" / "corpus.jsonl"
    if not path.exists() or not path.is_file():
        return

    # ---- Try persistent retriever indexes first ----
    # If we saved indexes from a previous run AND the corpus on disk
    # hasn't changed, skip the entire chunking + indexing pipeline.
    # Each retriever now exposes state_dict()/load_state_dict() so the
    # generic pickle-test path that used to hang on the BM25 inverted
    # index is no longer reached. Set TAU_RAG_RETRIEVER_PERSISTENCE=0
    # to disable (e.g. while debugging, or if disk is read-only).
    if _os.environ.get("TAU_RAG_RETRIEVER_PERSISTENCE", "1") == "1":
        try:
            from ..storage import (load_all_retrievers,
                                       fingerprint_corpus as _fpc)
            pipe = _get_pipe()
            # Compute the EXPECTED fingerprint from the JSONL files only
            # (not the in-memory state, which is empty). Cheap: count lines
            # and hash the IDs.
            expected = _expected_fingerprint_from_files()
            if expected:
                manifest = load_all_retrievers(pipe,
                                                  expected_fingerprint=expected)
                if manifest is not None:
                    n_loaded = manifest.get("n_loaded", 0)
                    n_docs = manifest.get("indexed_docs", {}).get("n_docs", 0)
                    print(f"[tau-rag] retrievers loaded from disk: "
                          f"{n_loaded} retrievers, {n_docs} docs "
                          f"(fingerprint match — skipping rebuild)")
                    return
        except Exception as e:
            print(f"[tau-rag] retriever persistence load failed: {e}")

    try:
        pipe = _get_pipe()
        # Skip if pipeline already has docs (e.g. from snapshot restore)
        existing = len(getattr(pipe, "_indexed_docs", []) or [])
        if existing > 50:
            print(f"[tau-rag] corpus auto-load skipped: pipeline already "
                  f"has {existing} docs")
            return
        # v2 — domain classifier: stamp each doc with metadata.domain +
        # metadata.domain_scores so the frontend can filter / browse by
        # legal domain (חוזים, חברות, משפחה …). Cheap to run at load time.
        try:
            from ..domain_classifier import classify as _classify_dom
        except Exception:
            _classify_dom = None  # graceful degradation — feature off if import breaks

        # ---- Build progress tracker ----
        # Progress is stored in module globals so /v1/system/status can
        # read it during the long rebuild. Three phases: parsing → indexing
        # → done. ETA computed from rolling rate.
        import time as _t_prog
        _build_progress_set("phase", "parsing")
        _build_progress_set("started_at", int(_t_prog.time()))
        # Cheap pre-count to set total (for ETA). Reading lines is fast
        # even on 350k docs — the heavy work is parsing JSON + classifying.
        try:
            with path.open("r", encoding="utf-8") as _cnt:
                n_total_lines = sum(1 for _ in _cnt)
        except Exception:
            n_total_lines = 0
        _build_progress_set("n_total", n_total_lines)
        _build_progress_set("source", path.name)

        docs = []
        bad = 0
        _t_parse_start = _t_prog.time()
        with path.open("r", encoding="utf-8") as f:
            for i, line in enumerate(f):
                line = line.strip()
                if not line:
                    continue
                try:
                    rec = _alj.loads(line)
                    md = dict(rec.get("metadata") or {})
                    text = str(rec.get("text", ""))
                    if _classify_dom and "domain" not in md:
                        try:
                            res = _classify_dom(text)
                            if res.get("top"):
                                md["domain"] = res["top"]
                                md["domain_scores"] = res.get("scores", {})
                        except Exception:
                            pass
                    docs.append(_Doc(
                        id=str(rec.get("id", "")),
                        text=text,
                        metadata=md,
                    ))
                except Exception:
                    bad += 1
                # Update progress every 1000 docs (cheap)
                if (i + 1) % 1000 == 0:
                    elapsed = _t_prog.time() - _t_parse_start
                    rate = (i + 1) / max(elapsed, 0.001)
                    eta = (n_total_lines - i - 1) / max(rate, 1)
                    _build_progress_set("n_done", i + 1)
                    _build_progress_set("rate_per_s", round(rate, 1))
                    _build_progress_set("eta_s", int(eta))
        if docs:
            _build_progress_set("phase", "indexing")
            _build_progress_set("n_done", len(docs))
            # Default chunker: "legal_hebrew" — section-aware splitting on
            # סעיף N / פרק N / (א) / (1) etc., so a query like "מה אומר
            # סעיף 39?" hits a chunk whose entire content IS that section.
            # Empirically benchmarked on diagnose_v10 with the no_llm
            # preset (BM25 + gematria + hilbert + graph + reranker):
            #   • legal_hebrew → section 1.00, concept 1.00, applied 0.47,
            #                    generalize 0.90  (≈10.2/12, best)
            #   • fixed        → section 1.00, concept 1.00, applied 0.40,
            #                    generalize 0.90  (≈10.0/12)
            # Override with TAU_RAG_AUTOLOAD_CHUNKER=fixed (or sentence)
            # if a different corpus shape is being indexed.
            chunker_name = _os.environ.get(
                "TAU_RAG_AUTOLOAD_CHUNKER", "legal_hebrew")
            _t_idx_start = _t_prog.time()
            n = pipe.add_documents(docs, chunker=chunker_name)
            idx_dt = _t_prog.time() - _t_idx_start
            _build_progress_set("phase", "done")
            _build_progress_set("n_chunks", n)
            _build_progress_set("indexing_s", round(idx_dt, 1))
            _build_progress_set("eta_s", 0)
            print(f"[tau-rag] corpus auto-load: {len(docs)} docs, "
                  f"{n} chunks via {chunker_name} (bad: {bad}) "
                  f"from {path.name} ({idx_dt:.1f}s)")
    except Exception as _e:
        _build_progress_set("phase", "failed")
        _build_progress_set("error", str(_e))
        print(f"[tau-rag] corpus auto-load failed: {_e}")


_autoload_corpus_on_startup()


# v2 — Parquet judgments auto-load (Hebrew supreme-court rulings sampled
# from LawDBHeb/cases_clean.parquet via tau_rag/scripts/ingest_parquet_cases.py).
# Disabled with TAU_RAG_AUTOLOAD_PARQUET=0.
def _autoload_parquet_cases_on_startup() -> None:
    if _os.environ.get("TAU_RAG_AUTOLOAD_PARQUET", "1") == "0":
        return
    import json as _pj
    from pathlib import Path as _PP
    from ..core.types import Document as _DocP
    from ..pipeline import get_pipeline as _get_pipe_p

    here = _PP(__file__).resolve().parent.parent
    path = here / "runtime" / "parquet_cases.jsonl"
    if not path.exists() or path.stat().st_size == 0:
        return
    try:
        # Reuse the domain classifier so docs without a stamped domain
        # still get one, exactly like _autoload_corpus_on_startup does.
        try:
            from ..domain_classifier import classify as _cls_p
        except Exception:
            _cls_p = None

        pipe = _get_pipe_p()
        docs: list = []
        bad = 0
        with path.open("r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    rec = _pj.loads(line)
                    md = dict(rec.get("metadata") or {})
                    text = str(rec.get("text", ""))
                    if _cls_p and "domain" not in md:
                        try:
                            res = _cls_p(text)
                            if res.get("top"):
                                md["domain"] = res["top"]
                                md["domain_scores"] = res.get("scores", {})
                        except Exception:
                            pass
                    docs.append(_DocP(
                        id=str(rec.get("id", "")),
                        text=text,
                        metadata=md,
                    ))
                except Exception:
                    bad += 1
        if docs:
            chunker = _os.environ.get("TAU_RAG_AUTOLOAD_CHUNKER", "legal_hebrew")
            n = pipe.add_documents(docs, chunker=chunker)
            print(f"[tau-rag] parquet cases auto-load: {len(docs)} docs, "
                  f"{n} chunks via {chunker} (bad: {bad}) "
                  f"from {path.name}")
            # Invalidate citation network cache — it'll be rebuilt on
            # next /v1/judgments/.../network or /v1/citations/popular call
            try:
                if hasattr(pipe, "_citation_network_cache"):
                    pipe._citation_network_cache = None
                if hasattr(pipe, "_outcome_stats_cache"):
                    pipe._outcome_stats_cache = None
            except Exception:
                pass
    except Exception as _e:
        print(f"[tau-rag] parquet cases auto-load failed: {_e}")


_autoload_parquet_cases_on_startup()


# v5 — Scraped content auto-load (court verdicts + legislation + WhatsApp
# threads + dover press releases). Written by tau_rag.scrapers — same
# {id, text, metadata} shape as parquet_cases, so we reuse the loader.
def _autoload_scraped_on_startup() -> None:
    if _os.environ.get("TAU_RAG_AUTOLOAD_SCRAPED", "1") == "0":
        return
    import json as _sj
    from pathlib import Path as _PS
    from ..core.types import Document as _DocS
    from ..pipeline import get_pipeline as _get_pipe_s

    here = _PS(__file__).resolve().parent.parent
    path = here / "runtime" / "scraped" / "scraped_corpus.jsonl"
    if not path.exists() or path.stat().st_size == 0:
        return
    try:
        try:
            from ..domain_classifier import classify as _cls_s
        except Exception:
            _cls_s = None
        pipe = _get_pipe_s()
        docs = []
        bad = 0
        with path.open("r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try:
                    rec = _sj.loads(line)
                    md = dict(rec.get("metadata") or {})
                    text = str(rec.get("text", ""))
                    if _cls_s and "domain" not in md:
                        try:
                            res = _cls_s(text)
                            if res.get("top"):
                                md["domain"] = res["top"]
                                md["domain_scores"] = res.get("scores", {})
                        except Exception:
                            pass
                    docs.append(_DocS(
                        id=str(rec.get("id", "")),
                        text=text,
                        metadata=md,
                    ))
                except Exception:
                    bad += 1
        if docs:
            chunker = _os.environ.get("TAU_RAG_AUTOLOAD_CHUNKER", "legal_hebrew")
            n = pipe.add_documents(docs, chunker=chunker)
            print(f"[tau-rag] scraped corpus auto-load: {len(docs)} docs, "
                  f"{n} chunks via {chunker} (bad: {bad}) "
                  f"from {path.name}")
    except Exception as _e:
        print(f"[tau-rag] scraped corpus auto-load failed: {_e}")


_autoload_scraped_on_startup()


# v4.x — Community corpus auto-load on startup.
# Approved submissions live in two places:
#   1. SQLite (uploads_store.db) — the source of truth.
#   2. runtime/community_corpus.jsonl — append-only, replayable.
# On every startup we replay the JSONL into the live pipeline so all
# previously-approved community docs are searchable, then run a bulk
# promote pass to backfill anything that was approved while the server
# was down (or to populate the JSONL on first run).
# Disabled with TAU_RAG_AUTOLOAD_COMMUNITY=0.
#
# IMPORTANT: must use pipeline.get_pipeline() — NOT the local _pipeline.
# fastapi_app's _pipeline is a DIFFERENT instance from the one /v1/query
# (and the rest of the app) uses via get_pipeline(). They were never
# registered with each other. Adding docs to the wrong one means search
# silently can't find them.
if _os.environ.get("TAU_RAG_AUTOLOAD_COMMUNITY", "1") != "0":
    try:
        from ..upload import promote as _up_promote
        from ..pipeline import get_pipeline as _get_pipe
        _shared_pipe = _get_pipe()
        _replay = _up_promote.replay_community_corpus(_shared_pipe)
        if _replay.get("loaded"):
            print(f"[tau-rag] community corpus replay: "
                  f"{_replay['loaded']} docs, "
                  f"{_replay.get('n_chunks', 0)} chunks")
        _bulk = _up_promote.bulk_promote_unpromoted(_shared_pipe)
        if _bulk.get("n_promoted"):
            print(f"[tau-rag] community corpus backfill: "
                  f"promoted {_bulk['n_promoted']} of "
                  f"{_bulk['n_pending']} pending "
                  f"({_bulk['n_chunks_added']} chunks)")
        if _bulk.get("n_failed"):
            print(f"[tau-rag] community corpus backfill: "
                  f"{_bulk['n_failed']} failures")
    except Exception as _e:
        print(f"[tau-rag] community auto-load failed: {_e}")

# Persist retriever indexes after the full corpus is loaded.
# Runs in a background thread so it never blocks startup. Each retriever
# now has state_dict()/load_state_dict() — the slow generic pickle.dumps
# probe is no longer needed. Set TAU_RAG_RETRIEVER_PERSISTENCE=0 to
# disable (debugging, ephemeral filesystems, etc).
if _os.environ.get("TAU_RAG_RETRIEVER_PERSISTENCE", "1") == "1":
    try:
        from ..pipeline import get_pipeline as _gp_persist
        from ..storage import save_all_retrievers as _save_retr
        from ..storage.retriever_persistence import schedule_save as _schedule_save
        import threading as _th_persist
        def _do_persist():
            try:
                _shared_pipe = _gp_persist()
                _existing_n = len(getattr(_shared_pipe, "_indexed_docs", []) or [])
                if _existing_n > 100:
                    import time as _ttp
                    _t0 = _ttp.time()
                    _manifest = _save_retr(_shared_pipe)
                    _dt = _ttp.time() - _t0
                    n_saved = sum(1 for r in _manifest.get("retrievers", {}).values()
                                    if r.get("saved"))
                    print(f"[tau-rag] retriever indexes persisted: "
                          f"{n_saved} saved in {_dt:.1f}s")
            except Exception as _e:
                print(f"[tau-rag] retriever persistence save failed: {_e}")
        # Run in background — never block startup
        _th_persist.Thread(target=_do_persist, daemon=True,
                             name="retriever-persist").start()

        # ---- Auto-save trigger + OOM safety net on add_documents() ----
        # Wrap pipe.add_documents to:
        #   1. Refuse new work if system memory is critically high
        #      (default >90% — set TAU_RAG_OOM_THRESHOLD_PCT to override).
        #      Returns 0 docs added without crashing the process. The
        #      caller sees an empty result and can retry after some
        #      memory pressure has cleared.
        #   2. Schedule a debounced background save after each
        #      successful call. Multiple rapid adds within 60s coalesce
        #      into a single save — a 5k-doc batch triggers ONE save.
        # Configurable via TAU_RAG_PERSIST_DEBOUNCE_S (default 60s)
        # and TAU_RAG_OOM_THRESHOLD_PCT (default 90).
        try:
            _shared_pipe = _gp_persist()
            _orig_add = _shared_pipe.add_documents
            _debounce_s = float(_os.environ.get(
                "TAU_RAG_PERSIST_DEBOUNCE_S", "60"))
            _oom_threshold = float(_os.environ.get(
                "TAU_RAG_OOM_THRESHOLD_PCT", "90"))

            def _check_memory_pressure() -> Optional[float]:
                """Return current memory % if usage is critical, else None.
                Silent-fail to None if psutil isn't installed."""
                try:
                    import psutil as _psm
                    pct = _psm.virtual_memory().percent
                    return pct if pct >= _oom_threshold else None
                except Exception:
                    return None

            def _add_documents_with_save(*args, **kwargs):
                pct = _check_memory_pressure()
                if pct is not None:
                    print(f"[tau-rag] add_documents REFUSED — system memory "
                          f"at {pct:.1f}% (>= {_oom_threshold}% threshold). "
                          f"Free RAM or restart before continuing.")
                    return 0   # signal "no docs added"
                result = _orig_add(*args, **kwargs)
                try:
                    _schedule_save(_shared_pipe, delay_seconds=_debounce_s)
                except Exception:
                    pass   # never break add_documents on save scheduling
                return result
            _shared_pipe.add_documents = _add_documents_with_save
            print(f"[tau-rag] auto-save trigger installed "
                  f"(debounce={_debounce_s}s, oom_threshold={_oom_threshold}%)")
        except Exception as _e:
            print(f"[tau-rag] auto-save trigger install failed: {_e}")

        # ---- Graceful shutdown: flush pending save on SIGTERM/SIGINT ----
        # Without this, killing the server within `_debounce_s` of a recent
        # add_documents() loses any state that was queued for save.
        # Uvicorn's own signal handler triggers our atexit, but to be safe
        # we register an explicit signal handler that runs flush before
        # the rest of the shutdown sequence.
        try:
            from ..storage.retriever_persistence import flush_pending_save
            import signal as _signal, atexit as _atexit
            _shutdown_done = {"value": False}

            def _graceful_shutdown(*args):
                if _shutdown_done["value"]:
                    return
                _shutdown_done["value"] = True
                try:
                    pipe = _gp_persist()
                    n_docs = len(getattr(pipe, "_indexed_docs", []) or [])
                    if n_docs > 100:
                        print(f"[tau-rag] graceful shutdown — flushing "
                              f"persistence ({n_docs:,} docs)...")
                        flush_pending_save(pipe, timeout_s=120.0)
                except Exception as _e:
                    print(f"[tau-rag] shutdown flush failed: {_e}")

            # SIGTERM = systemd / Docker stop. SIGINT = Ctrl+C.
            # We DON'T install for SIGKILL — it's uncatchable by design.
            try:
                _signal.signal(_signal.SIGTERM, _graceful_shutdown)
            except (ValueError, OSError):
                pass   # not main thread (uvicorn workers)
            try:
                _signal.signal(_signal.SIGINT, _graceful_shutdown)
            except (ValueError, OSError):
                pass
            # atexit as a final safety net (e.g. sys.exit() in code path)
            _atexit.register(_graceful_shutdown)
            print(f"[tau-rag] graceful shutdown handlers installed "
                  f"(SIGTERM, SIGINT, atexit)")
        except Exception as _e:
            print(f"[tau-rag] shutdown handler install failed: {_e}")
    except Exception as _e:
        print(f"[tau-rag] retriever persistence start failed: {_e}")


# Seed admin API key from env var on startup (dev convenience).
# If TAU_RAG_SEED_ADMIN_KEY is set, register it as an admin key so
# curl/browser can use it immediately. In production, use a secret
# manager — don't commit the env value to source control.
def _seed_admin_key_from_env() -> None:
    import os
    seed = os.environ.get("TAU_RAG_SEED_ADMIN_KEY", "").strip()
    if not seed:
        return
    try:
        from ..middleware.auth import (
            get_auth, _hash_key, APIKey)
        store = get_auth()
        h = _hash_key(seed)
        # Register as admin scope — idempotent (overwrite if exists)
        store._keys[h] = APIKey(
            key_hash=h,
            label="seed-admin-from-env",
            scopes={"admin", "read", "write"},
        )
        try:
            store._save()
        except Exception:
            pass
        print(
            "[tau-rag] Seeded admin key from "
            "TAU_RAG_SEED_ADMIN_KEY "
            f"(hash={h[:12]}...) — use it as X-API-Key")
    except Exception as e:
        print(f"[tau-rag] Failed to seed admin key: {e}")


# Auto-snapshot on shutdown — pairs with auto-restore on startup above.
def _save_snapshot_on_shutdown() -> None:
    # Stop the periodic thread first so we don't race with the final save
    auto = get_autosnapshotter()
    if auto:
        auto.stop()
        set_autosnapshotter(None)
    # v1.78 — also stop the metrics sampler cleanly
    try:
        from ..middleware import get_metrics_sampler, set_metrics_sampler
        msamp = get_metrics_sampler()
        if msamp:
            msamp.stop()
            set_metrics_sampler(None)
    except Exception:
        pass
    # v1.81 — stop the alert scheduler cleanly
    try:
        from ..middleware import get_alert_scheduler, set_alert_scheduler
        asched = get_alert_scheduler()
        if asched:
            asched.stop()
            set_alert_scheduler(None)
    except Exception:
        pass
    # v1.93 — stop the analytics retention scheduler cleanly
    try:
        from ..middleware import (
            get_retention_scheduler, set_retention_scheduler,
        )
        rs = get_retention_scheduler()
        if rs:
            rs.stop()
            set_retention_scheduler(None)
    except Exception:
        pass
    path = _os.environ.get("TAU_RAG_SNAPSHOT_PATH")
    if not path:
        return
    try:
        summary = _pipeline.save_snapshot(path)
        print(f"[tau-rag] shutdown snapshot saved: {summary}")
        get_obs().audit("snapshot.auto_save_on_shutdown", **summary)
    except Exception as _e:
        print(f"[tau-rag] shutdown snapshot failed: {_e}")


# v2.99.84 — lightweight ping endpoint. Used by:
#   - keep-warm loop (faster than /readyz)
#   - frontend on page load (TLS handshake + container wake-up)
# Returns instantly without touching the pipeline.
@app.get("/v1/ping")
def ping():  # type: ignore
    return {"ok": True, "ts": _wa_time.time()}


# v2.99.84 — in-memory dashboard cache (5-sec TTL) — avoids re-reading
# the entire JSONL file on every refresh (every modal open triggers this).
_WA_DASH_CACHE: Dict[tuple, tuple] = {}   # (days, top_k) → (response, expires_ts)
_WA_DASH_TTL   = 5.0


async def _keep_warm_loop():
    """Day 48 — background task that keeps the pipeline warm.

    HF Spaces (CPU-basic tier) doesn't stop the container, but after
    a few minutes of idle the heavy retrieval pipeline drops out of
    OS page cache + Python LRU caches. First request after idle then
    takes ~20s vs ~1s warm.

    This task fires every TAU_RAG_KEEPWARM_SEC seconds (default 240
    = 4 min, so well under the typical 5-10 min cooling window) and
    runs a trivial retrieve to keep the hot path resident. Errors are
    swallowed — keep-warm should never crash the app.

    Disable by setting TAU_RAG_KEEPWARM_SEC=0.
    """
    import asyncio
    # v2.99.84 — was 240, lowered to 180 for safer margin under HF idle.
    interval = int(_os.environ.get("TAU_RAG_KEEPWARM_SEC", "180"))
    if interval <= 0:
        return
    # Wait 60s after boot before starting — let real startup finish first.
    await asyncio.sleep(60)
    while True:
        try:
            from ..pipeline import get_pipeline
            from ..core.types import Query
            pipe = get_pipeline()
            # Trivial query — touches retrievers + tokenizer caches.
            await asyncio.to_thread(pipe.retrievers.search, Query(text="חוזה"), k=2)
        except Exception:
            pass  # swallow — never crash the app from keep-warm
        await asyncio.sleep(interval)


async def _eager_prewarm():
    """Day 48 — eager-load the pipeline BEFORE the Space accepts HTTP
    traffic, so no first-user ever pays the ~19s cold-start penalty.

    Without this, the very first HTTP request after every container
    restart (deploy, OOM-kill, etc.) pays ~19s for retriever + encoder
    + shard loading. That's exactly the moment a friend-lawyer clicks
    the link for the first time → they conclude the system is broken
    and bounce.

    v2 (Day 48 follow-up): runs MULTIPLE representative queries
    covering distinct domains (contracts/labor/torts/procedure/criminal).
    Single-query prewarm only touched one shard set — the second query
    in a different domain still paid ~2-3s for shard load. With 6
    queries spanning the main domains, every subsequent request hits
    warm caches across the board.

    Trade-off: startup is now ~15-25s slower. HF reports stage
    `RUNNING_APP_STARTING` while this runs. Worth it: every user from
    the very first request gets <2s warm latency on ANY domain.

    Disable by setting TAU_RAG_EAGER_PREWARM=0 (e.g. in dev).
    """
    import asyncio, time as _t
    if _os.environ.get("TAU_RAG_EAGER_PREWARM", "1") == "0":
        return
    t0 = _t.time()
    try:
        from ..pipeline import get_pipeline
        from ..core.types import Query
        pipe = await asyncio.to_thread(get_pipeline)
        # Multi-domain warmup — each query likely hits a different shard
        # and exercises distinct retrieval paths. Order roughly by
        # expected user-traffic share.
        WARMUP_QUERIES = [
            "חוזה",                  # contracts (most common)
            "אפרופים פרשנות",       # supreme-court anchor cluster
            "פיטורים שימוע",        # labor shard
            "רשלנות נזיקין",        # torts shard
            "חזקת חפות",            # criminal shard
            "סבירות מנהלית",        # administrative shard
        ]
        for q in WARMUP_QUERIES:
            try:
                await asyncio.to_thread(
                    pipe.retrievers.search, Query(text=q), k=2)
            except Exception:
                pass  # one bad query shouldn't block the others
        print(f"[eager-prewarm] {len(WARMUP_QUERIES)} domains warm "
              f"after {_t.time()-t0:.1f}s")
    except Exception as e:
        # Don't block startup if prewarm fails — degraded mode is still
        # better than no service.
        print(f"[eager-prewarm] FAILED ({type(e).__name__}: {e}) — "
              f"first request will pay cold-start cost")


@asynccontextmanager
async def _app_lifespan(_app: FastAPI):
    _seed_admin_key_from_env()
    # Day 48 — EAGER prewarm BEFORE yielding control. The Space is in
    # RUNNING_APP_STARTING during this; HF won't route traffic yet. So
    # no user ever hits a cold pipeline.
    await _eager_prewarm()
    # Day 48 — also start the keep-warm background task to prevent
    # re-cooling. asyncio.create_task schedules it on the running loop;
    # the lifespan keeps a reference so it isn't GC'd. Task is cancelled
    # on shutdown.
    import asyncio
    keep_warm_task = asyncio.create_task(_keep_warm_loop())
    try:
        yield
    finally:
        keep_warm_task.cancel()
        try:
            await keep_warm_task
        except (asyncio.CancelledError, Exception):
            pass
        _save_snapshot_on_shutdown()


app.router.lifespan_context = _app_lifespan

app.include_router(system_router)
app.include_router(documents_router)
app.include_router(public_router)
app.include_router(chat_router)
app.include_router(admin_content_router)
app.include_router(admin_controls_router)
app.include_router(admin_ops_router)
app.include_router(admin_runtime_router)


# ------------------------------------------------------------------ schemas
# Request models for the public HTTP surface live in api.models.


# ------------------------------------------------------------------ routes
_PLAYGROUND_HTML = """<!DOCTYPE html>
<html lang="he" dir="rtl"><head><meta charset="UTF-8"><title>TAU-RAG</title>
<style>
body{background:#0e1117;color:#e6edf3;font-family:"Segoe UI","Heebo",Arial,sans-serif;
  margin:0;padding:28px;max-width:900px;margin:0 auto;line-height:1.7}
h1{font-size:26px}h2{font-size:18px;color:#cdd9e5;margin-top:24px;
  border-bottom:1px dashed #30363d;padding-bottom:4px}
a{color:#58a6ff}.card{background:#161b22;border:1px solid #30363d;
  border-radius:12px;padding:14px 18px;margin:10px 0}
textarea,input{width:100%;background:#0d1117;color:#e6edf3;border:1px solid #30363d;
  border-radius:8px;padding:10px;font-family:inherit;font-size:14px;box-sizing:border-box}
textarea{min-height:90px}
button{background:#1f6feb;color:#fff;border:none;border-radius:8px;padding:10px 18px;
  font-size:14px;cursor:pointer;margin-top:8px}button:hover{background:#388bfd}
button.ghost{background:transparent;border:1px solid #30363d;color:#9da7b3}
button.ghost:hover{background:#161b22;color:#e6edf3}
pre{background:#11161d;border:1px solid #30363d;border-radius:8px;padding:12px;
  overflow-x:auto;direction:ltr;text-align:left;font-size:13px;white-space:pre-wrap}
.muted{color:#9da7b3;font-size:13px}.pill{display:inline-block;padding:2px 10px;
  border-radius:999px;border:1px solid #2ea043;color:#aee9c5;font-size:12px}
.tabs{display:flex;gap:8px;margin:16px 0 0;border-bottom:1px solid #30363d}
.tab{padding:8px 16px;cursor:pointer;border-radius:8px 8px 0 0;color:#9da7b3;
  border:1px solid transparent}
.tab.active{background:#161b22;border:1px solid #30363d;border-bottom:1px solid #161b22;
  color:#e6edf3;margin-bottom:-1px}
.tabpanel{display:none}.tabpanel.active{display:block}
.bubble{margin:10px 0;padding:12px 16px;border-radius:14px;max-width:85%;line-height:1.6}
.bubble.user{background:#1f2d3f;margin-left:auto;border:1px solid #30405a}
.bubble.bot{background:#161b22;border:1px solid #30363d}
.bubble .src{font-size:11px;color:#9da7b3;margin-top:6px}
.bubble.followup{border-right:3px solid #a371f7}
.row{display:flex;gap:8px;align-items:center}
.row input{flex:1}
.badge{font-size:11px;padding:2px 8px;border-radius:999px;border:1px solid #30363d;
  margin-right:6px;color:#9da7b3}
</style></head><body>
<h1>🔎 TAU-RAG — Hebrew legal RAG</h1>
<p class="muted">Pipeline alive at <code>/v1/*</code>. Swagger UI:
<a href="/docs">/docs</a> · ReDoc: <a href="/redoc">/redoc</a></p>

<div class="tabs">
  <div class="tab active" onclick="showTab('tab-docs')">📄 Documents</div>
  <div class="tab" onclick="showTab('tab-ask')">❓ Single query</div>
  <div class="tab" onclick="showTab('tab-chat')">💬 Chat</div>
</div>

<div id="tab-docs" class="tabpanel active">
<h2>Add documents</h2>
<div class="card">
<textarea id="docs">[
  {"id":"labor-5","text":"המעביד חייב לשלם לעובד תוספת 25%, למעט עבודה בשבת.","metadata":{}},
  {"id":"labor-7","text":"אסור למעביד לפטר עובד בשל סירובו לעבוד שעות נוספות.","metadata":{}},
  {"id":"labor-12","text":"חופשת מחלה תינתן בתנאי שהמציא אישור רפואי.","metadata":{}},
  {"id":"family-1","text":"לבני זוג הזכות להתגרש על פי דין, כפוף לאישור בית הדין הרבני.","metadata":{}}
]</textarea>
<button onclick="addDocs()">POST /v1/documents</button>
<pre id="docsOut"></pre>
</div>
</div>

<div id="tab-ask" class="tabpanel">
<h2>Ask a question</h2>
<div class="card">
<input id="q" value="מה חובות המעביד?">
<button onclick="ask()">POST /v1/generate</button>
<pre id="ansOut"></pre>
</div>
</div>

<div id="tab-chat" class="tabpanel">
<h2>Chat with session memory</h2>
<div class="card">
<div class="row">
  <span class="badge">session</span>
  <input id="sid" value="browser-1" style="max-width:180px">
  <button class="ghost" onclick="newSid()">new</button>
  <button class="ghost" onclick="clearSession()">clear server-side</button>
  <button class="ghost" onclick="loadHistory()">reload history</button>
</div>
<div id="chatLog" style="margin-top:14px;min-height:120px"></div>
<div class="row" style="margin-top:10px">
  <input id="chatQ" placeholder="שאל שאלה… (Enter לשליחה)"
    onkeydown="if(event.key==='Enter') sendChat()">
  <button onclick="sendChat()">שלח</button>
</div>
</div>
</div>

<script>
function showTab(id){
  document.querySelectorAll('.tab').forEach(t=>t.classList.remove('active'));
  document.querySelectorAll('.tabpanel').forEach(t=>t.classList.remove('active'));
  document.getElementById(id).classList.add('active');
  event.target.classList.add('active');
  if(id==='tab-chat') loadHistory();
}
async function addDocs(){
  const docs = JSON.parse(document.getElementById('docs').value);
  const r = await fetch('/v1/documents',{method:'POST',
    headers:{'Content-Type':'application/json'},
    body: JSON.stringify({documents: docs})});
  document.getElementById('docsOut').textContent = JSON.stringify(await r.json(),null,2);
}
async function ask(){
  const q = document.getElementById('q').value;
  const r = await fetch('/v1/generate',{method:'POST',
    headers:{'Content-Type':'application/json'},
    body: JSON.stringify({query:q,k:10,rerank_k:5,strategy:"hybrid",lang:"he"})});
  document.getElementById('ansOut').textContent = JSON.stringify(await r.json(),null,2);
}
function newSid(){
  document.getElementById('sid').value = 'browser-' + Math.random().toString(36).slice(2,7);
  document.getElementById('chatLog').innerHTML = '';
}
function sid(){return document.getElementById('sid').value.trim() || 'default';}
function addBubble(who, text, extra){
  const div = document.createElement('div');
  div.className = 'bubble ' + who + (extra && extra.followup ? ' followup' : '');
  div.innerHTML = text.replace(/</g,'&lt;').replace(/\\n/g,'<br>');
  if(extra && extra.sources){
    const s = document.createElement('div');
    s.className = 'src';
    s.textContent = 'sources: ' + JSON.stringify(extra.sources) +
      (extra.omega !== undefined ? '  ·  Ω=' + extra.omega.toFixed(2) : '');
    div.appendChild(s);
  }
  document.getElementById('chatLog').appendChild(div);
  div.scrollIntoView({behavior:'smooth', block:'end'});
}
async function sendChat(){
  const q = document.getElementById('chatQ').value.trim();
  if(!q) return;
  addBubble('user', q);
  document.getElementById('chatQ').value = '';
  const r = await fetch('/v1/chat',{method:'POST',
    headers:{'Content-Type':'application/json'},
    body: JSON.stringify({query:q, session_id: sid(), lang:'he'})});
  const data = await r.json();
  addBubble('bot', data.answer, {sources:data.sources, omega:data.signals.omega});
}
async function clearSession(){
  await fetch('/v1/sessions/' + encodeURIComponent(sid()), {method:'DELETE'});
  document.getElementById('chatLog').innerHTML = '';
}
async function loadHistory(){
  try{
    const r = await fetch('/v1/sessions/' + encodeURIComponent(sid()));
    if(!r.ok) return;
    const info = await r.json();
    document.getElementById('chatLog').innerHTML = '';
    (info.turns || []).forEach(t => {
      addBubble('user', t.query);
      addBubble('bot', t.answer, {sources: t.sources});
    });
  }catch(e){}
}
loadHistory();
</script></body></html>"""


def _static_file(name: str) -> Optional[str]:
    """Read a file from tau_rag/static/ if it exists."""
    import os
    here = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    p = os.path.join(here, "static", name)
    if not os.path.isfile(p):
        return None
    try:
        with open(p, "r", encoding="utf-8") as f:
            return f.read()
    except Exception:
        return None


def root():
    """Serve the end-user chat UI (tau_rag/static/index.html).
    Falls back to the inline playground if the static file is missing."""
    from fastapi.responses import HTMLResponse
    html = _static_file("index.html")
    if html is None:
        html = _PLAYGROUND_HTML
    return HTMLResponse(html)


def admin_console():
    """Serve the admin dashboard (tau_rag/static/admin.html)."""
    from fastapi.responses import HTMLResponse
    html = _static_file("admin.html")
    if html is None:
        return HTMLResponse(
            "<h1>Admin console not available</h1>"
            "<p>tau_rag/static/admin.html is missing.</p>",
            status_code=404)
    return HTMLResponse(html)


def playground():
    """The original built-in playground, kept for backward-compat."""
    from fastapi.responses import HTMLResponse
    return HTMLResponse(_PLAYGROUND_HTML)


def favicon():
    # Silent 204 to stop the noisy browser 404
    from fastapi.responses import Response
    return Response(status_code=204)


def health():
    return {"ok": True, "version": "2.0.0"}


def version_manifest():
    """Build + runtime version info. Unauthenticated so anyone (including
    deploy scripts, monitoring, and teammates debugging) can check what's
    actually running. Does not expose secrets, just structural metadata."""
    import platform as _plat
    import sys as _sys
    import subprocess as _sp

    # Pipeline structure
    retr_multi = getattr(_pipeline, "retrievers", None)
    retriever_members = (
        sorted(getattr(retr_multi, "retrievers", {}).keys())
        if retr_multi is not None else []
    )
    cfg = _pipeline.config
    preset = _os.environ.get("TAU_RAG_PRESET", "unknown")

    # Build info — keep it safe to serialize
    try:
        import fastapi as _fastapi
        fastapi_v = getattr(_fastapi, "__version__", "unknown")
    except Exception:
        fastapi_v = "unknown"

    # Git metadata — optional; silent fallback if not in a git checkout
    git_info: Dict[str, Any] = {}
    try:
        commit = _sp.check_output(
            ["git", "rev-parse", "HEAD"],
            stderr=_sp.DEVNULL, timeout=1).decode().strip()
        branch = _sp.check_output(
            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
            stderr=_sp.DEVNULL, timeout=1).decode().strip()
        dirty = _sp.check_output(
            ["git", "status", "--porcelain"],
            stderr=_sp.DEVNULL, timeout=1).decode().strip()
        git_info = {
            "commit": commit,
            "commit_short": commit[:8],
            "branch": branch,
            "dirty": bool(dirty),
        }
    except Exception:
        git_info = {"available": False}

    # Enabled feature flags (from env) — helps debug "is it off/on in prod?"
    features = {
        "auth_required":   _os.environ.get("TAU_RAG_REQUIRE_AUTH") == "1",
        "auto_warmup":     _os.environ.get("TAU_RAG_WARMUP") == "1",
        "snapshot_path":   bool(_os.environ.get("TAU_RAG_SNAPSHOT_PATH")),
        "snapshot_interval": float(_os.environ.get("TAU_RAG_SNAPSHOT_INTERVAL") or 0) or None,
        "synonyms_path":   bool(_os.environ.get("TAU_RAG_SYNONYMS_PATH")),
        "hsts":            _os.environ.get("TAU_RAG_HSTS") == "1",
        "csp":             bool(_os.environ.get("TAU_RAG_CSP")),
        "cors_origins":    int(bool(_os.environ.get("TAU_RAG_CORS_ORIGINS"))),
        "log_stdout":      _os.environ.get("TAU_RAG_LOG_STDOUT") == "1",
        "log_file":        bool(_os.environ.get("TAU_RAG_LOG_PATH")),
        "audit_webhook":   bool(_os.environ.get("TAU_RAG_AUDIT_WEBHOOK_URL")),
        "endpoint_rate_limits": bool(
            _os.environ.get("TAU_RAG_ENDPOINT_RATE_LIMITS")),
        "audit_export":    True,
        "log_stream":      True,
        "key_rotation":    True,
        "snapshot_diff":   True,
        "metrics_history": bool(
            _os.environ.get("TAU_RAG_METRICS_HISTORY_INTERVAL_SEC")),
        "webhook_circuit_breaker": True,
        "alert_rules":     True,
        "alert_scheduler": bool(
            _os.environ.get("TAU_RAG_ALERT_EVAL_INTERVAL_SEC")),
        "doc_stats":       True,
        "retriever_attribution": True,
        "cocitation_graph": True,
        "content_health": True,
        "content_health_ui": True,
        "eval_latency_gate": True,
        "content_health_history": True,
        "query_fingerprints": True,
        "preset_promote_candidates": True,
        "preset_auto_promote": True,
        "analytics_retention": True,
        "analytics_retention_scheduler": bool(
            _os.environ.get("TAU_RAG_ANALYTICS_TTL_DAYS")),
        "doc_freshness": True,
        "doc_update_priorities": True,
        "query_doc_affinity": True,
        "analytics_dump_restore": True,
        "query_analytics_ui": True,
        "query_replay": True,
        "replay_body_capture": (
            _os.environ.get("TAU_RAG_OBS_CAPTURE_BODY") == "1"),
        "v2_stable_api": True,
        "about_endpoint": True,
        "semantic_cache": (
            _os.environ.get("TAU_RAG_SEMANTIC_CACHE") == "1"),
        "graph_cocitation_boost": (
            float(_os.environ.get("TAU_RAG_GRAPH_COCITATION_BOOST") or 0.0)
            if _os.environ.get("TAU_RAG_GRAPH_COCITATION_BOOST") else False),
        "query_doc_boost": (
            float(_os.environ.get("TAU_RAG_QUERY_DOC_BOOST") or 0.0)
            if _os.environ.get("TAU_RAG_QUERY_DOC_BOOST") else False),
        "request_spans": True,
        "span_timeline_ui": True,
        "limiter_backend_protocol": True,
        "maintenance_mode": True,
        "pii_redaction": True,
        "pii_redaction_enabled": (
            _os.environ.get("TAU_RAG_PII_REDACT") == "1"),
        "slow_query_detection": True,
        "readiness_registry": True,
        "daily_quota": True,
        "idempotency_key": True,
        "request_timeout": True,
        "log_rotation": True,
        "body_limit": True,
        "deprecation_headers": True,
        "feature_flags_registry": True,
        "sigterm_autodrain": True,
        "per_endpoint_timeouts": True,
        "cost_tracking": True,
        "response_compression": True,
        "ip_allowlist": True,
        "query_complexity": True,
        "key_labels": True,
        "i18n_errors": True,
        "request_bundle": True,
        "label_aggregation": True,
        "ops_dashboard_ui": True,
        "slo_tracking": True,
        "async_jobs": True,
        "cost_alerts": True,
        "prometheus_histograms": True,
        "etag_conditional_get": True,
        "hmac_request_signing": True,
        "batch_queries": True,
        "webhook_retry_dlq": True,
        "hebrew_normalization": True,
        "audit_search": True,
        "soft_delete": True,
        "query_coalescing": True,
        "tenant_flag_overrides": True,
        "response_redaction": True,
        "config_snapshot": True,
        "shadow_pipeline": True,
        "scheduled_tasks": True,
        "pipeline_stage_breakers": True,
        "w3c_traceparent": True,
        "jwt_auth": True,
        "bulk_import": True,
        "doc_acl": True,
        "w3c_baggage": True,
        "canary_routing": True,
        "autocomplete": True,
        "eval_gate": True,
        "near_dup_detection": True,
        "query_intent": True,
        "doc_versioning": True,
        "concurrency_limit": True,
        "language_detection": True,
        "xss_sanitizer": True,
        "resource_pool": True,
        "cache_warmup": True,
        "lazy_init": True,
        "pipeline_failover": True,
        "intent_rerank": True,
        "stage_budgets": True,
        "graceful_degradation": True,
        "query_rewrite": True,
        "score_calibration": True,
        "answer_postprocess": True,
        "retriever_health": True,
        "stream_throttle": True,
        "context_sizer": True,
        "answer_confidence": True,
        "legal_entities": True,
        "conversation_summarizer": True,
        "diversity_ranker": True,
        "synonym_expansion": True,
        "feedback_learning": True,
        "embedding_compression": True,
        "phrase_detection": True,
        "meaning_preservation": True,
        "retrieval_explain": True,
        "smart_chunking": True,
        "query_decomposition": True,
        "cost_estimator": True,
        "adaptive_k": True,
        "pipeline_trace": True,
        "ab_experiments": True,
        "tenant_quotas": True,
        "multihop_retrieval": True,
        "snippet_extraction": True,
        "answer_grounding": True,
        "corpus_drift": True,
        "prometheus_metrics": True,
        "request_replay": True,
        "composite_health": True,
        "platform_manifest": True,
        "self_diagnose": True,
        "changelog_generator": True,
        "html_dashboard": True,
        "compliance_bundle": True,
        "middleware_sdk": True,
        "eval_harness": True,
        "session_persistence": True,
        "event_bus": True,
        "graph_retriever": True,
        "federated_retrieval": True,
        "audit_export": True,
        "graph_builder": True,
        "answer_templates": True,
        "clarification_planner": True,
        "citation_parser": True,
        "spell_correct": True,
        "retrieval_budget": True,
        "doc_summarizer": True,
        "privilege_filter": True,
        "time_travel": True,
        "fact_extractor": True,
        "answer_consensus": True,
        "authority_ranker": True,
        "doc_comparator": True,
        "similar_docs": True,
        "query_lifecycle": True,
        "timeline_builder": True,
        "corpus_contradictions": True,
        "anonymizer": True,
        "doc_lineage": True,
        "result_explainer": True,
        "audit_anomaly": True,
        "llm_router": True,
        "citation_expander": True,
        "answer_quality_gate": True,
        "coverage_monitor": True,
        "followup_rewriter": True,
        "reasoning_chain": True,
        "injection_detector": True,
        "cost_forecaster": True,
        "crosslingual_bridge": True,
        "diversity_enforcer": True,
        "session_exporter": True,
        "fact_consistency": True,
        "issue_spotter": True,
        "doc_classifier": True,
        "cache_invalidator": True,
        "query_analytics": True,
        "corpus_router": True,
        "stream_chunker": True,
        "kg_extractor": True,
        "citation_network": True,
        "template_extractor": True,
        "slow_query_analyzer": True,
        "confidence_calibrator": True,
        "preview_generator": True,
        "retrieval_agreement": True,
        "answer_source_balance": True,
        "doc_staleness": True,
        "retrieval_coverage": True,
        "answer_hedging": True,
        "query_routing_optimizer": True,
        "answer_specificity": True,
        "rank_stability": True,
        "session_topic_tracker": True,
        "snippet_dedup": True,
        "citation_normalizer": True,
        "query_precision_classifier": True,
        "answer_format_validator": True,
        "retrieval_delta": True,
        "query_paraphrase": True,
        "doc_quality": True,
        "answer_coverage_gap": True,
        "query_throughput": True,
        "query_normalizer": True,
        "chunk_overlap": True,
        "answer_numeric_consistency": True,
        "error_budget": True,
        "request_fingerprint": True,
        "circuit_breaker": True,
        "span_exporter_protocol": True,
        "span_exporter_type": type(
            __import__("tau_rag.observability.span_exporters",
                       fromlist=["get_span_exporter"])
            .get_span_exporter()).__name__,
    }

    return {
        "version":   "2.0.0",
        "preset":    preset,
        "pipeline": {
            "retriever_members":  retriever_members,
            "generator_provider": getattr(cfg.generation, "provider", "unknown"),
            "fusion_method":      getattr(cfg.fusion, "method", "unknown"),
            "rerank_method":      (getattr(cfg.rerank, "method", None)
                                   if getattr(cfg, "rerank", None) else None),
            "verifier":           type(_pipeline.verifier).__name__,
            "chunker":            getattr(_pipeline, "_chunker_last", "fixed"),
        },
        "build": {
            "python":     _sys.version.split()[0],
            "platform":   _plat.platform(),
            "fastapi":    fastapi_v,
        },
        "git":      git_info,
        "features": features,
    }


# ------------------------------------------------------ ops-ready endpoints
from .metrics import render_prometheus, check_readiness   # noqa: E402


def livez():
    """Liveness probe — 200 if the process can answer."""
    from fastapi.responses import PlainTextResponse
    return PlainTextResponse("ok", status_code=200)


def readyz(require_warmed: bool = False):
    """Readiness probe — 503 + detail if pipeline isn't ready.

    Pass ``?require_warmed=1`` to also fail until ``POST /v1/admin/warmup``
    has been invoked (useful for deployment gating).

    v2.11 — also consults the pluggable readiness registry so plugins
    and operator-registered checks (Redis, S3, etc.) participate. A
    failing critical check in the registry flips /readyz to 503.
    """
    ok, detail = check_readiness(_pipeline, require_warmed=bool(require_warmed))
    # v2.11 — also check pluggable registry (ties v2.7 drain mode and any
    # operator-registered checks)
    from ..middleware.readiness import get_readiness_registry
    reg_result = get_readiness_registry().evaluate()
    if not ok or not reg_result["ready"]:
        # Merge detail from both sources
        body = {
            "detail": detail if not ok else None,
            "checks": reg_result["checks"],
            "n_passed": reg_result["n_passed"],
            "n_failed": reg_result["n_failed"],
        }
        raise HTTPException(status_code=503, detail=body)
    return {"ok": True, "detail": detail,
            "checks": reg_result["checks"]}


def admin_readiness():
    """Full readiness report (v2.11). Always returns 200, unlike
    /readyz — useful for dashboards that want to show health without
    tripping k8s routing. Contains every registered check's current
    state, plus an overall ``ready`` bool."""
    from ..middleware.readiness import get_readiness_registry
    return get_readiness_registry().evaluate()


def admin_warmup(request: Request):
    """Pre-load heavy components (embedders, tokenizers, adapters). Sets the
    ``pipeline._warmed`` flag so ``/readyz?require_warmed=1`` starts passing."""
    import time as _t
    t0 = _t.time()
    try:
        fn = getattr(_pipeline, "warmup", None)
        if callable(fn):
            fn()
        _pipeline._warmed = True   # type: ignore[attr-defined]
        elapsed = round((_t.time() - t0) * 1000.0, 2)
        get_obs().audit(
            "pipeline.warmup",
            actor_key=request.headers.get("x-api-key"),
            request_id=getattr(request.state, "request_id", None),
            elapsed_ms=elapsed,
        )
        return {"warmed": True, "elapsed_ms": elapsed}
    except Exception as e:
        raise HTTPException(status_code=500,
                            detail=f"warmup failed: {type(e).__name__}: {e}"[:200])


def metrics():
    """Prometheus exposition format — scrape me every 15s."""
    from fastapi.responses import PlainTextResponse
    auth = get_auth()
    keys = auth.list_keys()
    active = sum(1 for k in keys if not k.get("revoked"))
    revoked = sum(1 for k in keys if k.get("revoked"))
    body = render_prometheus(
        obs_stats=get_obs().stats(),
        cache_stats=get_cache().stats(),
        limiter_stats=get_limiter().stats(),
        auth_keys=active,
        auth_keys_revoked=revoked,
        version="2.0.0",
    )
    return PlainTextResponse(body, media_type="text/plain; version=0.0.4")


def add_documents(req: DocumentsRequest):
    try:
        validate_doc_list(req.documents)
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))
    docs = [Document(id=d.id, text=d.text, metadata=d.metadata) for d in req.documents]
    n = _pipeline.add_documents(docs)
    return {"added_chunks": n, "documents": len(docs)}


# ---- document lifecycle endpoints ----------------------------------------
def list_documents(
    request: Request,
    q: Optional[str] = None,
    limit: int = 50,
    offset: int = 0,
    preview_chars: int = 200,
):
    """List or search indexed documents.

    Query params:
      * ``q`` — substring (case-insensitive) over text + id
      * ``limit`` / ``offset`` — pagination (default 50 / 0)
      * ``preview_chars`` — first N chars returned per doc (default 200)
      * ``metadata.<key>=<value>`` — filter on flat metadata keys (repeatable)

    Backward-compat: with no query params, returns the v1.38 summary shape
    ``{documents: [...], count: N}``.
    """
    # Flat metadata filter: any query param starting with 'metadata.'
    meta_filter: Dict[str, str] = {}
    for k, v in request.query_params.multi_items():
        if k.startswith("metadata."):
            meta_filter[k[len("metadata."):]] = v

    if limit < 1 or limit > 10_000:
        limit = 50
    if offset < 0:
        offset = 0
    result = _pipeline.search_documents(
        q=q, metadata=meta_filter or None,
        limit=limit, offset=offset, preview_chars=preview_chars,
    )
    # Back-compat: keep `count` key that v1.38 clients expect
    result["count"] = result["matched"]
    return result


def export_documents(request: Request):
    """Export the full indexed corpus as JSONL (one ``{id,text,metadata}`` per line).

    Supports the same filters as ``GET /v1/documents``:
      * ``?q=<text>`` — substring search
      * ``?metadata.<key>=<value>`` — flat metadata filter (repeatable)

    Returns ``application/x-ndjson`` with a download filename so browsers save
    it as ``tau-rag-documents.jsonl``.
    """
    from fastapi.responses import PlainTextResponse
    import json as _json

    q = request.query_params.get("q")
    meta_filter: Dict[str, str] = {}
    for k, v in request.query_params.multi_items():
        if k.startswith("metadata."):
            meta_filter[k[len("metadata."):]] = v

    # Iterate through the full doc-log, applying the same filters as
    # search_documents() but without the limit cap — export is all-or-none.
    _pipeline._ensure_doc_log()
    qn = (q or "").strip().lower()
    lines: List[str] = []
    for d in _pipeline._indexed_docs:
        if qn:
            hay = (d.text or "").lower()
            if qn not in hay and qn not in (d.id or "").lower():
                continue
        if meta_filter:
            ok = True
            for mk, mv in meta_filter.items():
                if str((d.metadata or {}).get(mk)) != str(mv):
                    ok = False
                    break
            if not ok:
                continue
        lines.append(_json.dumps({
            "id":       d.id,
            "text":     d.text,
            "metadata": d.metadata or {},
        }, ensure_ascii=False))
    body = "\n".join(lines) + ("\n" if lines else "")
    return PlainTextResponse(
        body,
        media_type="application/x-ndjson",
        headers={
            "Content-Disposition":
            'attachment; filename="tau-rag-documents.jsonl"',
            "X-Document-Count": str(len(lines)),
        },
    )


def index_stats():
    """Corpus-level stats: doc count, text-length distribution, metadata
    value histogram, metadata coverage, and retriever set. Safe on large
    corpora (no full-text scan)."""
    return _pipeline.index_stats()


def admin_duplicates():
    """Scan the index for documents sharing normalized content (collapsed
    whitespace, case-folded, sha256'd). Returns ``{groups: [{hash, members}],
    n_groups, n_duplicate_docs, total_docs}`` — only groups with ≥2 members.
    Declared *before* ``/v1/documents/{doc_id}`` so FastAPI matches
    ``/duplicates`` as a fixed path, not a doc id."""
    groups = _pipeline.find_duplicates()
    pretty = [
        {"hash": h, "members": members}
        for h, members in sorted(groups.items(),
                                 key=lambda kv: -len(kv[1]))
    ]
    n_dup_docs = sum(len(g["members"]) for g in pretty)
    total = len(_pipeline.list_documents())
    return {
        "n_groups":         len(pretty),
        "n_duplicate_docs": n_dup_docs,
        "total_docs":       total,
        "groups":           pretty,
    }


# ---- Per-document citation stats (v1.82) --------------------------------
# Placed BEFORE /v1/documents/{doc_id} so FastAPI matches these fixed
# paths first — same ordering trick as /duplicates above.
def admin_docs_stats_summary():
    """Global rollup: n_docs tracked, total retrieved/cited, global
    cite_rate, persistence path (v1.82)."""
    from ..middleware import get_doc_stats
    return get_doc_stats().summary()


def admin_docs_stats_top_cited(n: int = 10):
    """Top ``n`` documents by citation count (v1.82). Each row carries
    ``{doc_id, n_retrieved, n_cited, cite_rate, first_seen_at, ...}``."""
    from ..middleware import get_doc_stats
    return {"top": get_doc_stats().top_cited(n=int(n))}


def admin_docs_stats_unused(
    min_retrieved: int = 1,
    max_cite_rate: float = 0.0,
):
    """Docs that were retrieved ``min_retrieved``+ times but whose
    cite_rate stayed at or below ``max_cite_rate`` (default 0 → never
    cited). Useful for finding retrieval false-positives (v1.82)."""
    from ..middleware import get_doc_stats
    return {
        "unused": get_doc_stats().unused(
            min_retrieved=int(min_retrieved),
            max_cite_rate=float(max_cite_rate),
        ),
    }


def admin_docs_stats_reset(request: Request):
    """Wipe all per-doc counters (v1.82). audit event emitted."""
    from ..middleware import get_doc_stats
    before = get_doc_stats().summary()
    get_doc_stats().clear()
    get_obs().audit(
        "doc.stats.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        prev_n_docs=before["n_docs"],
        prev_total_cited=before["total_cited"],
    )
    return {"reset": True, "before": before}


# ---- Per-retriever attribution (v1.83) ----------------------------------
def admin_retriever_stats():
    """All retrievers ranked by n_cited_contributions (v1.83)."""
    from ..middleware import get_retriever_attribution
    store = get_retriever_attribution()
    return {
        "summary": store.summary(),
        "stats":   store.all_stats(),
    }


def admin_retriever_ranking():
    """Retrievers ordered by cite_rate × log(1 + n_contributions) —
    smooths precision by sample size so rare-but-perfect retrievers
    don't outrank workhorses (v1.83)."""
    from ..middleware import get_retriever_attribution
    return {"ranking": get_retriever_attribution().ranking()}


def admin_retriever_stats_reset(request: Request):
    """Wipe per-retriever counters + audit (v1.83)."""
    from ..middleware import get_retriever_attribution
    store = get_retriever_attribution()
    before = store.summary()
    store.clear()
    get_obs().audit(
        "retriever.stats.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        prev_n_retrievers=before["n_retrievers"],
        prev_total_cited=before["total_cited"],
    )
    return {"reset": True, "before": before}


# ---- Co-citation graph (v1.84) ------------------------------------------
def admin_cocitation_summary():
    """Rollup of the co-citation graph: n_events (responses with ≥2
    cites), n_pairs, n_docs, total_count (v1.84)."""
    from ..middleware import get_cocitation
    return get_cocitation().summary()


def admin_cocitation_top(n: int = 20):
    """Top ``n`` most-common co-citation pairs (v1.84)."""
    from ..middleware import get_cocitation
    return {"top": get_cocitation().top_pairs(n=int(n))}


def admin_cocitation_reset(request: Request):
    """Wipe the co-citation graph + audit (v1.84)."""
    from ..middleware import get_cocitation
    store = get_cocitation()
    before = store.summary()
    store.clear()
    get_obs().audit(
        "cocitation.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        prev_n_pairs=before["n_pairs"],
        prev_total_count=before["total_count"],
    )
    return {"reset": True, "before": before}


def admin_content_health(
    top_n: int = 5,
    unused_min_retrieved: int = 3,
):
    """Consolidated corpus health report (v1.85) — merges v1.82 doc
    stats, v1.83 retriever attribution, v1.84 co-citation into a single
    answer. Cross-cutting insights: ``dead_docs`` (in corpus but never
    retrieved), ``isolated_docs`` (in corpus but never co-cited).

    Query params:
      * ``top_n`` — rows to include in top-cited / top-pairs / ranking
                    sections (default 5).
      * ``unused_min_retrieved`` — passed to doc_stats.unused() to
                                    filter retrieval false-positives.
    """
    from ..middleware import (
        get_doc_stats, get_retriever_attribution, get_cocitation,
    )
    doc_store  = get_doc_stats()
    ra_store   = get_retriever_attribution()
    cc_store   = get_cocitation()

    doc_summary = doc_store.summary()
    ra_summary  = ra_store.summary()
    cc_summary  = cc_store.summary()

    # Corpus set — every indexed doc id
    all_docs = {d["id"] for d in _pipeline.list_documents()}
    touched_docs = {
        d["doc_id"] for d in doc_store.top_cited(n=10 ** 9)
    }  # all tracked docs, regardless of count

    # Dead docs: indexed but counters never fired (n_retrieved == 0)
    dead = sorted(all_docs - touched_docs)

    # Isolated docs: tracked, but never co-cited with anyone
    #   (i.e. they never appeared in a response with ≥2 sources).
    partnered = set()
    for pair in cc_store.top_pairs(n=10 ** 9):
        partnered.add(pair["a"])
        partnered.add(pair["b"])
    isolated = sorted(touched_docs - partnered)

    # Derived corpus-level health score.
    #   * coverage = fraction of indexed docs that were ever retrieved
    #   * cite_rate = global doc-level cite rate
    #   * connectivity = fraction of touched docs that are non-isolated
    n_all = max(1, len(all_docs))
    n_touched = len(touched_docs)
    coverage = n_touched / n_all
    cite_rate = doc_summary.get("global_cite_rate", 0.0)
    connectivity = (
        (n_touched - len(isolated)) / max(1, n_touched)
        if n_touched else 0.0
    )
    # Equal-weight geometric mean — any dimension collapsing to 0
    # drags the whole score down. Helps operators see a single knob.
    score = (coverage * cite_rate * connectivity) ** (1 / 3) if (
        coverage > 0 and cite_rate > 0 and connectivity > 0
    ) else 0.0

    return {
        "score":       round(score, 4),
        "coverage":    round(coverage, 4),
        "cite_rate":   round(cite_rate, 4),
        "connectivity": round(connectivity, 4),
        "corpus": {
            "n_indexed":     len(all_docs),
            "n_touched":     n_touched,
            "n_dead":        len(dead),
            "n_isolated":    len(isolated),
        },
        "top_cited":   doc_store.top_cited(n=int(top_n)),
        "top_noisy":   doc_store.unused(
            min_retrieved=int(unused_min_retrieved),
            max_cite_rate=0.0,
        ),
        "retrievers":  {
            "summary": ra_summary,
            "ranking": ra_store.ranking()[:int(top_n)],
        },
        "cocitation":  {
            "summary":   cc_summary,
            "top_pairs": cc_store.top_pairs(n=int(top_n)),
        },
        "dead_docs":     dead,
        "isolated_docs": isolated,
    }


# ---- Query fingerprint analytics (v1.89) --------------------------------
def admin_query_stats_summary():
    """Rollup: n_unique_queries, n_events (total), avg_sources per
    query (v1.89)."""
    from ..middleware import get_query_stats
    return get_query_stats().summary()


def admin_query_stats_top(n: int = 10):
    """Top ``n`` queries by frequency (v1.89)."""
    from ..middleware import get_query_stats
    return {"top": get_query_stats().top(n=int(n))}


def admin_query_stats_recent(
    since: Optional[float] = None,
    n: int = 10,
):
    """Queries whose last hit was ≥ ``since`` (Unix ts), newest first
    (v1.89). Omit ``since`` to get the most-recently-seen N regardless
    of age."""
    from ..middleware import get_query_stats
    return {"recent": get_query_stats().recent(since=since, n=int(n))}


def admin_query_promote_candidates(
    min_count: int = 3,
    min_sources: float = 0.0,
    max_avg_latency_ms: Optional[float] = None,
    n: int = 20,
):
    """Return query fingerprints that are strong candidates for
    promotion to saved presets (v1.90).

    Heuristic: a query is a good preset candidate when it's been
    asked often enough to justify the saved-search slot, returns a
    useful number of sources on average, doesn't already have a
    preset, and (optionally) isn't slower than some threshold.

    Query params:
      * ``min_count``          — minimum observed occurrences
                                  (default 3).
      * ``min_sources``        — minimum ``avg_sources`` per response.
                                  Filters out popular queries that
                                  find nothing useful (0 = no filter).
      * ``max_avg_latency_ms`` — optional ceiling on average latency.
                                  Omit to not filter.
      * ``n``                  — cap on rows returned.

    A candidate row is a ``QueryStats.to_dict()`` plus a derived
    ``suggested_preset_name`` that ops can accept as-is.
    """
    from ..middleware import get_query_stats
    from ..middleware.query_stats import _canonicalize
    from ..presets import get_preset_store

    query_store = get_query_stats()
    preset_store = get_preset_store()

    # Pre-index the presets by canonical query text so O(P) setup turns
    # the per-candidate check into O(1).
    existing_canonical: set = set()
    for p in preset_store.list_all():
        existing_canonical.add(_canonicalize(p.get("query", "")))

    candidates: List[Dict[str, Any]] = []
    for row in query_store.top(n=10 ** 9):
        if row["count"] < int(min_count):
            continue
        if row["avg_sources"] < float(min_sources):
            continue
        if (max_avg_latency_ms is not None
                and row["avg_latency_ms"] > float(max_avg_latency_ms)):
            continue
        canonical = _canonicalize(row["sample"])
        if canonical in existing_canonical:
            continue
        # Derive a snake_case preset name from the sample (short).
        suggested = _suggest_preset_name(row["sample"])
        candidates.append({
            **row,
            "suggested_preset_name": suggested,
            "already_preset": False,
        })
        if len(candidates) >= int(n):
            break

    return {
        "candidates":       candidates,
        "n_candidates":     len(candidates),
        "min_count":        int(min_count),
        "min_sources":      float(min_sources),
        "n_existing_presets": len(existing_canonical),
    }


class PresetPromoteRequest(BaseModel):
    # Explicit list of fingerprints to promote. Mutually exclusive with
    # auto-mode filters below — if both are set, names take precedence.
    fingerprints:       Optional[List[str]] = None
    # Auto-mode: pick candidates via filters (same as v1.90 endpoint)
    min_count:          int = 3
    min_sources:        float = 0.0
    max_avg_latency_ms: Optional[float] = None
    limit:              int = 20
    # Common preset knobs — applied to every created preset
    k:                  int = 10
    rerank_k:           int = 5
    strategy:           str = "hybrid"
    lang:               str = "he"
    # Naming
    name_prefix:        str = ""      # optional prefix (e.g. "promoted-")
    # Safety
    dry_run:            bool = False  # preview without creating


@app.post("/v1/admin/queries/promote")
def admin_queries_promote(req: PresetPromoteRequest, request: Request):
    """Auto-promote query-stats candidates to saved presets (v1.91).

    Two modes:
      * **Explicit**: pass ``fingerprints=[...]`` to promote specific
        queries by their v1.89 fingerprints.
      * **Filtered**: pass the same filter params as
        ``/v1/admin/queries/promote-candidates`` (v1.90) and we'll
        promote the top ``limit`` that match.

    In both modes we skip queries whose canonical text already has a
    preset, and we deduplicate against name collisions (adding a ``-2``,
    ``-3`` suffix). ``dry_run=True`` returns the planned actions
    without touching the preset store.

    Returns::

        {
          created:  [{name, query, fingerprint}, ...],
          skipped:  [{fingerprint, reason}, ...],
          dry_run:  bool,
          n_created: int,
          n_skipped: int,
        }

    One ``preset.auto_promoted`` audit event is emitted per created
    preset so the change flows through the webhook (v1.71).
    """
    from ..middleware import get_query_stats
    from ..middleware.query_stats import _canonicalize
    from ..presets import get_preset_store, QueryPreset

    query_store  = get_query_stats()
    preset_store = get_preset_store()

    # Pre-index existing presets by canonical query AND by name so we
    # can skip duplicates in both dimensions.
    existing_canonical: set = set()
    existing_names:     set = set()
    for p in preset_store.list_all():
        existing_canonical.add(_canonicalize(p.get("query", "")))
        existing_names.add(p.get("name", ""))

    # ---- pick candidates
    if req.fingerprints is not None:
        # Explicit mode
        rows = []
        for fp in req.fingerprints:
            s = query_store.get(fp)
            if s is None:
                rows.append({"fingerprint": fp, "_missing": True})
            else:
                rows.append(s.to_dict())
    else:
        # Filter mode — mirror the v1.90 logic
        rows = []
        for row in query_store.top(n=10 ** 9):
            if row["count"] < int(req.min_count):
                continue
            if row["avg_sources"] < float(req.min_sources):
                continue
            if (req.max_avg_latency_ms is not None
                    and row["avg_latency_ms"] > float(req.max_avg_latency_ms)):
                continue
            canonical = _canonicalize(row["sample"])
            if canonical in existing_canonical:
                continue
            rows.append(row)
            if len(rows) >= int(req.limit):
                break

    # ---- plan
    created:  List[Dict[str, Any]] = []
    skipped:  List[Dict[str, Any]] = []
    used_names = set(existing_names)

    for row in rows:
        fp = row.get("fingerprint", "")
        if row.get("_missing"):
            skipped.append({"fingerprint": fp,
                            "reason": "fingerprint not found in query_stats"})
            continue
        sample = row.get("sample", "")
        canonical = _canonicalize(sample)
        if not canonical:
            skipped.append({"fingerprint": fp,
                            "reason": "empty query after canonicalization"})
            continue
        if canonical in existing_canonical:
            skipped.append({"fingerprint": fp,
                            "reason": "already a preset (same canonical)"})
            continue
        base_name = _suggest_preset_name(sample)
        if req.name_prefix:
            base_name = f"{req.name_prefix}{base_name}"
        # Dedupe against already-used names
        name = base_name
        suffix = 2
        while name in used_names:
            name = f"{base_name}-{suffix}"
            suffix += 1
        if not req.dry_run:
            try:
                preset_store.put(QueryPreset(
                    name=name, query=sample,
                    k=int(req.k), rerank_k=int(req.rerank_k),
                    strategy=req.strategy, lang=req.lang,
                    notes=f"auto-promoted from traffic (fp={fp}, "
                          f"count={row.get('count')})",
                ))
            except Exception as e:
                skipped.append({"fingerprint": fp,
                                "reason": f"put failed: "
                                          f"{type(e).__name__}: {e}"})
                continue
            get_obs().audit(
                "preset.auto_promoted",
                actor_key=request.headers.get("x-api-key"),
                request_id=getattr(request.state, "request_id", None),
                name=name, fingerprint=fp,
                count=row.get("count"),
                avg_sources=row.get("avg_sources"),
            )
        used_names.add(name)
        existing_canonical.add(canonical)
        created.append({
            "name":        name,
            "query":       sample,
            "fingerprint": fp,
            "count":       row.get("count"),
        })

    return {
        "created":    created,
        "skipped":    skipped,
        "n_created":  len(created),
        "n_skipped":  len(skipped),
        "dry_run":    bool(req.dry_run),
    }


def _suggest_preset_name(sample: str) -> str:
    """Turn a raw user query into a safe preset id: lowercase, ASCII
    where possible, hyphens for whitespace, strip punctuation, cap
    length. Keeps non-ASCII runs (Hebrew letters) as-is when they
    can't be transliterated, so the result is still recognizable."""
    import re
    s = (sample or "").strip().lower()
    # Drop typical punctuation
    s = re.sub(r"[\?\!\.\,\:\;\(\)\[\]\{\}\"'`]", "", s)
    s = re.sub(r"\s+", "-", s)
    # Clip to 48 chars — leaves room for a namespace prefix
    if len(s) > 48:
        s = s[:48].rstrip("-")
    return s or "preset"


@app.post("/v1/admin/queries/stats/reset")
def admin_query_stats_reset(request: Request):
    """Wipe the query fingerprint store + audit (v1.89)."""
    from ..middleware import get_query_stats
    store = get_query_stats()
    before = store.summary()
    store.clear()
    get_obs().audit(
        "query.stats.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        prev_n_unique=before["n_unique_queries"],
        prev_n_events=before["n_events"],
    )
    return {"reset": True, "before": before}


class AnalyticsPruneRequest(BaseModel):
    older_than_days:   Optional[float] = None
    older_than_seconds: Optional[float] = None
    # Which stores to prune. Omit a flag to skip that store.
    doc_stats:         bool = True
    retriever_attribution: bool = True
    cocitation:        bool = True
    query_stats:       bool = True


@app.get("/v1/admin/analytics/prune/scheduler")
def admin_retention_scheduler_status():
    """Report the background retention scheduler state (v1.93)."""
    from ..middleware import get_retention_scheduler
    sched = get_retention_scheduler()
    if sched is None:
        return {"enabled": False, "is_running": False}
    return {"enabled": True, **sched.status()}


@app.post("/v1/admin/analytics/prune")
def admin_analytics_prune(
    req: AnalyticsPruneRequest,
    request: Request,
):
    """Prune stale entries across all analytics stores (v1.92).

    Removes rows whose last-activity timestamp is older than the TTL.
    Pass ``older_than_days`` OR ``older_than_seconds``; one is required.
    Per-store flags let ops target a subset (e.g. prune only
    ``query_stats`` while keeping doc history).

    Returns per-store ``{n_removed, n_remaining_after}`` + audit event.
    """
    from ..middleware import (
        get_doc_stats, get_retriever_attribution,
        get_cocitation, get_query_stats,
    )
    # Resolve TTL seconds
    if req.older_than_seconds is not None:
        ttl_s = float(req.older_than_seconds)
    elif req.older_than_days is not None:
        ttl_s = float(req.older_than_days) * 86400.0
    else:
        raise HTTPException(
            status_code=400,
            detail={"error": "either older_than_days or "
                             "older_than_seconds is required"},
        )
    if ttl_s <= 0:
        raise HTTPException(
            status_code=400,
            detail={"error": "TTL must be positive"},
        )

    results: Dict[str, Dict[str, int]] = {}
    total_removed = 0

    if req.doc_stats:
        store = get_doc_stats()
        n = store.prune(ttl_s)
        results["doc_stats"] = {
            "n_removed":         n,
            "n_remaining_after": store.summary()["n_docs"],
        }
        total_removed += n
    if req.retriever_attribution:
        store = get_retriever_attribution()
        n = store.prune(ttl_s)
        results["retriever_attribution"] = {
            "n_removed":         n,
            "n_remaining_after": store.summary()["n_retrievers"],
        }
        total_removed += n
    if req.cocitation:
        store = get_cocitation()
        n = store.prune(ttl_s)
        results["cocitation"] = {
            "n_removed":         n,
            "n_remaining_after": store.summary()["n_pairs"],
        }
        total_removed += n
    if req.query_stats:
        store = get_query_stats()
        n = store.prune(ttl_s)
        results["query_stats"] = {
            "n_removed":         n,
            "n_remaining_after": store.summary()["n_unique_queries"],
        }
        total_removed += n

    get_obs().audit(
        "analytics.prune",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        older_than_seconds=ttl_s,
        total_removed=total_removed,
    )
    return {
        "ttl_seconds":   ttl_s,
        "total_removed": total_removed,
        "per_store":     results,
    }


def about():
    """Public architectural overview (v2.0). Non-admin — no key
    required. Useful for clients, docs generators, CI checks.
    Covers the layers of tau-rag and points at the relevant primitives.
    """
    return {
        "name":    "tau-rag",
        "version": "2.0.0",
        "tagline": "Unified Hebrew-legal RAG with structure-preserving "
                   "verification + TAU-Ω signals",
        "layers": {
            "retrieval": {
                "retrievers": ["bm25", "gematria", "hilbert", "graph"],
                "fusion":     "rank-based or weighted",
                "rerank":     "optional cross-encoder or score-based",
                "chunker":    "fixed | sentence | legal_hebrew",
            },
            "observability_stack": {
                "push":        "webhook + breaker (v1.71/79)",
                "batch":       "/v1/admin/audit/export (v1.74)",
                "pull_stream": "/v1/admin/logs/stream SSE (v1.75)",
                "history":     "metrics + content health (v1.78/88)",
                "alerts":      "rules + scheduler (v1.80/81)",
            },
            "content_analytics": {
                "doc_stats":            "v1.82",
                "retriever_attribution": "v1.83",
                "cocitation":           "v1.84",
                "query_stats":          "v1.89",
                "doc_freshness":        "v1.94",
                "query_doc_affinity":   "v1.96",
            },
            "analytics_cross_cuts": {
                "content_health":      "v1.85/86",
                "update_priorities":   "v1.95",
                "query_analytics_ui":  "v1.98",
                "dump_restore":        "v1.97",
            },
            "debugging": {
                "request_ids":  "X-Request-ID on every response",
                "replay":       "v1.99 re-execute by request_id",
            },
        },
        "patterns": {
            "side_channel_stores":
                "singleton+inject pattern; pipeline hook silent-fail; "
                "admin CRUD; persistence opt-in",
            "daemons":
                "AutoSnapshotter / MetricsHistorySampler / "
                "AlertScheduler / AnalyticsRetentionScheduler — "
                "start/stop/is_running/status + Event.wait + silent-fail",
            "quiet_on_zero":
                "schedulers emit audits only on state change",
            "html_dashboards":
                "inline CSS, zero JS, zero CDN, escape-safe, "
                "meta-refresh for wall screens",
        },
        "stability": {
            "api_stability": "v2.0 marks /v1/* as stable — additive "
                              "changes only; breaking changes → /v2/*",
            "deprecation_policy": "6-month notice; features.* flags "
                                   "track active capabilities",
        },
        "counts": {
            "endpoints":        "80+",
            "tests":            "1096+",
            "side_channels":    6,
            "daemons":          4,
            "html_dashboards":  4,
        },
    }


@app.get("/v1/admin/requests/{request_id}/spans/ui", response_class=Response, include_in_schema=False)
def admin_request_spans_ui(request_id: str, refresh: int = 0):
    """HTML timeline view of a request's spans (v2.5). Renders v2.4
    span data as a gantt-style bar chart for quick operator inspection.
    Same design language as v1.86 / v1.98 dashboards."""
    from fastapi.responses import HTMLResponse
    from .span_timeline_ui import render_span_timeline
    # Reuse the JSON endpoint's data gathering by calling its function
    data = admin_request_spans(request_id)
    html = render_span_timeline(
        request_id=data["request_id"],
        n_spans=data["n_spans"],
        total_ms=data["total_ms"],
        spans=data["spans"],
        refresh_sec=int(refresh or 0),
    )
    return HTMLResponse(html)


@app.get("/v1/admin/requests/{request_id}/spans")
def admin_request_spans(request_id: str):
    """Return in-memory trace spans for a specific request_id (v2.4).

    Pipeline stages (understand, retrieve, fuse, rerank, generate,
    verify, ...) each open a span; middleware auto-tags every span
    with the current request_id. This endpoint pulls them back by
    that id.

    Returns::

        {
          "request_id": str,
          "n_spans":    int,
          "total_ms":   float (root span duration),
          "spans":      [{name, trace_id, span_id, parent_id,
                           duration_ms, attrs}, ...],
        }

    Useful for:
      * seeing where time went inside a slow request
      * correlating a user complaint with what actually executed
      * diagnosing retriever-specific failures per query
    """
    from ..observability.tracing import get_tracer
    spans = get_tracer().spans_for_request_id(request_id)
    if not spans:
        raise HTTPException(
            status_code=404,
            detail={"error": "no spans found for request_id",
                    "hint": "spans are in-memory — oldest get evicted "
                            "past the 5000-span cap"},
        )
    out = []
    root_total_ms = 0.0
    for s in spans:
        dur_ms = (s.end_ts - s.start_ts) * 1000.0 if s.end_ts else 0.0
        if s.parent_id is None and dur_ms > root_total_ms:
            root_total_ms = dur_ms
        out.append({
            "name":        s.name,
            "trace_id":    s.trace_id,
            "span_id":     s.span_id,
            "parent_id":   s.parent_id,
            "duration_ms": round(dur_ms, 2),
            "attrs":       s.attrs,
        })
    return {
        "request_id": request_id,
        "n_spans":    len(out),
        "total_ms":   round(root_total_ms, 2),
        "spans":      out,
    }


@app.post("/v1/admin/replay/{request_id}")
def admin_replay(request_id: str, request: Request):
    """Re-execute a previously-logged request against the current
    pipeline (v1.99). Requires body capture to have been on at the
    time of the original request (``TAU_RAG_OBS_CAPTURE_BODY=1``).

    Returns::

        {
          "request_id":          original id,
          "replay_request_id":   new id,
          "path":                /v1/search | /v1/generate | /v1/chat,
          "original_body":       captured body (truncated to 4KB),
          "query":               parsed query text,
          "replay": {
            "sources":  [doc_id, ...],
            "answer":   str|None,
            "passed":   bool|None,
            "omega":    float|None,
            "timing_ms": dict,
          },
          "note": optional human-readable comparison hint.
        }

    Useful for:
      * regression debug — "did our new chunker break this query?"
      * eval gold augmentation — turn a real user query into a gold case.
      * postmortem analysis — replay after a bad deploy to prove harm.
    """
    import json as _json
    # Find the original row in the obs ring buffer
    row = None
    for entry in reversed(get_obs().tail(n=10 ** 9, event_type="request")):
        if entry.get("request_id") == request_id:
            row = entry
            break
    if row is None:
        raise HTTPException(
            status_code=404,
            detail={"error": "request_id not found in obs log",
                    "hint": "ensure the request was recorded — "
                            "obs log is a ring buffer; older rows "
                            "may have been evicted"},
        )
    body_txt = (row.get("extra") or {}).get("body")
    if not body_txt:
        raise HTTPException(
            status_code=400,
            detail={"error": "no captured body on this request",
                    "hint": "set TAU_RAG_OBS_CAPTURE_BODY=1 and re-run "
                            "the original request to enable replay"},
        )
    try:
        payload = _json.loads(body_txt)
    except Exception as e:
        raise HTTPException(
            status_code=400,
            detail={"error": "captured body is not valid JSON",
                    "detail": f"{type(e).__name__}: {e}"})
    path = row.get("path") or ""

    # Support the 3 replayable endpoints
    from ..core.types import Query, Strategy
    q_text = payload.get("query")
    if not q_text:
        raise HTTPException(status_code=400,
                            detail={"error": "captured body has no 'query' field"})
    strategy_name = (payload.get("strategy") or "hybrid").lower()
    try:
        strategy = Strategy(strategy_name)
    except Exception:
        strategy = Strategy.HYBRID
    q = Query(
        text=q_text,
        lang=payload.get("lang") or "he",
        filters=payload.get("filters") or {},
        strategy=strategy,
        k=int(payload.get("k") or 10),
        rerank_k=int(payload.get("rerank_k") or 5),
    )

    # Generate a fresh replay_request_id and mark this as a replay in
    # obs so the new run is traceable.
    replay_id = generate_request_id()
    resp = _pipeline.run(q)

    # Extract the interesting bits
    omega = None
    try:
        omega = float(resp.signals.omega) if resp.signals else None
    except Exception:
        pass
    verif = getattr(resp, "verification", None)

    get_obs().audit(
        "replay.executed",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        original_request_id=request_id,
        replay_request_id=replay_id,
        path=path,
    )

    return {
        "request_id":        request_id,
        "replay_request_id": replay_id,
        "path":              path,
        "original_body":     body_txt,
        "query":             q_text,
        "replay": {
            "sources":   list(resp.sources or []),
            "answer":    resp.answer,
            "omega":     omega,
            "passed":    (bool(getattr(verif, "passed", False))
                           if verif else None),
            "timing_ms": dict(resp.timing_ms or {}),
        },
        "note": ("compare 'replay.sources' to whatever the original "
                 "response had — differences reveal pipeline drift "
                 "since the original run"),
    }


def admin_query_analytics_ui(
    top_n: int = 10,
    matrix_queries: int = 6,
    matrix_docs: int = 6,
    min_count: int = 3,
    refresh: int = 0,
):
    """HTML dashboard for query analytics (v1.98).

    Merges v1.89 (query_stats), v1.90 (promote candidates), and
    v1.96 (query × doc affinity) into one visual page. Same design
    language as the v1.86 content-health UI.

    Query params:
      * ``top_n``          — rows to show in 'top queries' (default 10).
      * ``matrix_queries`` — rows in the affinity heatmap (default 6).
      * ``matrix_docs``    — cols in the affinity heatmap (default 6).
      * ``min_count``      — promote-candidate threshold (default 3).
      * ``refresh``        — auto-refresh seconds (0 = off).
    """
    from fastapi.responses import HTMLResponse
    from ..middleware import (
        get_query_stats, get_query_doc_affinity,
    )
    from ..middleware.query_stats import _canonicalize
    from ..presets import get_preset_store
    from .query_analytics_ui import render_query_analytics_ui

    q_store = get_query_stats()
    qda = get_query_doc_affinity()
    preset_store = get_preset_store()

    summary = q_store.summary()
    top_qs = q_store.top(n=int(top_n))

    # Promote candidates — reuse same logic as v1.90 endpoint
    existing_canonical = set()
    for p in preset_store.list_all():
        existing_canonical.add(_canonicalize(p.get("query", "")))
    promote = []
    for row in q_store.top(n=10 ** 9):
        if row["count"] < int(min_count):
            continue
        if row.get("avg_sources", 0.0) < 1.0:
            continue
        canon = _canonicalize(row["sample"])
        if canon in existing_canonical:
            continue
        promote.append({
            **row,
            "suggested_preset_name": _suggest_preset_name(row["sample"]),
        })
        if len(promote) >= 10:
            break

    # Build affinity matrix grid
    matrix_q_rows = q_store.top(n=int(matrix_queries))
    # Pick the top docs across the shown queries
    doc_votes: Dict[str, int] = {}
    for q in matrix_q_rows:
        for r in qda.top_docs_for_query(q["fingerprint"], n=10 ** 9):
            doc_votes[r["doc_id"]] = doc_votes.get(r["doc_id"], 0) + r["count"]
    top_doc_ids = [d for d, _ in sorted(doc_votes.items(),
                                         key=lambda kv: -kv[1])]
    top_doc_ids = top_doc_ids[:int(matrix_docs)]

    # Pre-compute the (fp, doc_id) → count map for the rendered subset
    matrix_pairs: Dict[tuple, int] = {}
    for q in matrix_q_rows:
        for r in qda.top_docs_for_query(q["fingerprint"], n=10 ** 9):
            if r["doc_id"] in top_doc_ids:
                matrix_pairs[(q["fingerprint"], r["doc_id"])] = r["count"]

    html = render_query_analytics_ui(
        summary=summary,
        top_queries=top_qs,
        promote_candidates=promote,
        matrix_queries=matrix_q_rows,
        matrix_docs=top_doc_ids,
        matrix_pairs=matrix_pairs,
        refresh_sec=int(refresh or 0),
    )
    return HTMLResponse(html)


def admin_content_health_ui(
    top_n: int = 5,
    unused_min_retrieved: int = 3,
    refresh: int = 0,
):
    """HTML dashboard for the corpus health report (v1.86). Same data
    as ``/v1/admin/content/health`` (v1.85) but rendered as a self-
    contained page. ``?refresh=N`` opts into an HTML meta-refresh every
    N seconds — handy to leave open on a wall screen."""
    from fastapi.responses import HTMLResponse
    from .content_health_ui import render_content_health_ui
    health = admin_content_health(
        top_n=top_n,
        unused_min_retrieved=unused_min_retrieved,
    )
    html = render_content_health_ui(
        health, refresh_sec=int(refresh or 0),
    )
    return HTMLResponse(html)


# ---- Doc freshness tracking (v1.94) -------------------------------------
def admin_doc_freshness_summary():
    """Rollup of doc freshness — n_docs, oldest/newest added_at,
    median age, total modifications (v1.94)."""
    from ..middleware import get_doc_freshness
    return get_doc_freshness().summary()


def admin_doc_freshness_stale(older_than_days: float = 90.0):
    """Docs whose last activity (modified or added) is older than
    ``older_than_days`` (v1.94). Oldest-first ordering so content
    audit can start at the top."""
    from ..middleware import get_doc_freshness
    return {
        "older_than_days": float(older_than_days),
        "stale": get_doc_freshness().stale(
            older_than_days=float(older_than_days)),
    }


def admin_doc_freshness_reset(request: Request):
    """Wipe the freshness side-channel store + audit. Use after a
    large corpus reload when old timestamps are meaningless (v1.94)."""
    from ..middleware import get_doc_freshness
    store = get_doc_freshness()
    before = store.summary()
    store.clear()
    get_obs().audit(
        "doc.freshness.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        prev_n_docs=before["n_docs"],
    )
    return {"reset": True, "before": before}


# ---- Query × doc affinity (v1.96) ---------------------------------------
# ---- Unified analytics dump/restore (v1.97) -----------------------------
_ANALYTICS_DUMP_VERSION = 1


def admin_analytics_dump():
    """Single-call snapshot of all 6 side-channel analytics stores
    (v1.97). Returns JSON with one key per store — versioned payload
    for migration and offline analysis.

    Stores included:
      * v1.82 doc_stats
      * v1.83 retriever_attribution
      * v1.84 cocitation
      * v1.89 query_stats
      * v1.94 doc_freshness
      * v1.96 query_doc_affinity

    The ``version`` field signals the dump format — kept as a single
    integer so restore can refuse incompatible schemas.
    """
    from ..middleware import (
        get_doc_stats, get_retriever_attribution,
        get_cocitation, get_query_stats, get_doc_freshness,
        get_query_doc_affinity,
    )

    # doc_stats: every tracked doc + raw counters
    doc_store = get_doc_stats()
    doc_rows = []
    # Walk via top_cited(n=10**9) — it returns all rows
    for row in doc_store.top_cited(n=10 ** 9):
        doc_rows.append({
            "doc_id":             row["doc_id"],
            "n_retrieved":        row.get("n_retrieved", 0),
            "n_cited":            row.get("n_cited", 0),
            "first_seen_at":      row.get("first_seen_at"),
            "last_retrieved_at":  row.get("last_retrieved_at"),
            "last_cited_at":      row.get("last_cited_at"),
        })

    # retriever_attribution
    ra = get_retriever_attribution()
    ra_rows = [
        {k: v for k, v in row.items() if k != "cite_rate"}
        for row in ra.all_stats()
    ]

    # cocitation
    cc = get_cocitation()
    cc_pairs = cc.top_pairs(n=10 ** 9)

    # query_stats
    qs = get_query_stats()
    qs_rows = []
    for row in qs.top(n=10 ** 9):
        qs_rows.append({
            "fingerprint":     row["fingerprint"],
            "sample":          row["sample"],
            "count":           row["count"],
            "first_seen_at":   row["first_seen_at"],
            "last_seen_at":    row["last_seen_at"],
            "sum_sources":     row["sum_sources"],
            "sum_latency_ms":  row["sum_latency_ms"],
        })

    # doc_freshness
    fs = get_doc_freshness()
    fs_rows = []
    # No top() here, walk via data dict — use all stale(older_than_days=0)
    # which gives every row sorted oldest-first; take as-is.
    import time as _t
    now = _t.time()
    for row in fs.stale(older_than_days=0, now=now):
        fs_rows.append({
            "doc_id":           row["doc_id"],
            "added_at":         row["added_at"],
            "last_modified_at": row.get("last_modified_at"),
            "n_modifications":  row.get("n_modifications", 0),
        })

    # query_doc_affinity
    qda = get_query_doc_affinity()
    qda_pairs: List[Dict[str, Any]] = []
    qda_summary = qda.summary()
    # Use the inverted index: for each query fingerprint, enumerate
    # its docs. Cheap — no O(N*M) scan.
    for fp in list(qda._by_query.keys()):       # noqa: SLF001
        for row in qda.top_docs_for_query(fp, n=10 ** 9):
            qda_pairs.append({
                "fingerprint": fp,
                "doc_id":      row["doc_id"],
                "count":       row["count"],
                "last_seen":   row.get("last_seen"),
            })

    return {
        "version":   _ANALYTICS_DUMP_VERSION,
        "exported_at": now,
        "doc_stats": {
            "rows":    doc_rows,
            "n_rows":  len(doc_rows),
        },
        "retriever_attribution": {
            "rows":    ra_rows,
            "n_rows":  len(ra_rows),
        },
        "cocitation": {
            "pairs":   cc_pairs,
            "n_pairs": len(cc_pairs),
            "n_events": cc.summary().get("n_events", 0),
        },
        "query_stats": {
            "rows":    qs_rows,
            "n_rows":  len(qs_rows),
        },
        "doc_freshness": {
            "rows":    fs_rows,
            "n_rows":  len(fs_rows),
        },
        "query_doc_affinity": {
            "pairs":      qda_pairs,
            "n_pairs":    len(qda_pairs),
            "n_events":   qda_summary.get("n_events", 0),
        },
    }


class AnalyticsRestoreRequest(BaseModel):
    dump:         Dict[str, Any]
    replace:      bool = True   # default: wipe before restore


@app.post("/v1/admin/analytics/restore")
def admin_analytics_restore(req: AnalyticsRestoreRequest, request: Request):
    """Rebuild the 6 analytics stores from a v1.97 dump.

    ``replace=True`` (default) wipes each store before loading —
    gives exact-match state after restore. ``replace=False`` merges
    on top of existing data (fingerprints / doc_ids collide → counters
    SUM). Useful for aggregating traffic across prod nodes.

    Refuses to restore from dumps whose ``version`` doesn't match the
    current ``_ANALYTICS_DUMP_VERSION`` — schema compatibility gate.
    """
    from ..middleware import (
        get_doc_stats, set_doc_stats, DocumentStatsStore,
        get_retriever_attribution, set_retriever_attribution,
        RetrieverAttributionStore,
        get_cocitation, set_cocitation, CoCitationStore,
        get_query_stats, set_query_stats, QueryStatsStore,
        get_doc_freshness, set_doc_freshness, DocFreshnessStore,
        get_query_doc_affinity, set_query_doc_affinity,
        QueryDocAffinityStore,
    )

    dump = req.dump or {}
    ver = dump.get("version")
    if ver != _ANALYTICS_DUMP_VERSION:
        raise HTTPException(
            status_code=400,
            detail={"error": "version mismatch",
                    "expected": _ANALYTICS_DUMP_VERSION,
                    "got": ver},
        )

    totals: Dict[str, int] = {}

    # doc_stats
    if req.replace:
        set_doc_stats(DocumentStatsStore())
    doc_store = get_doc_stats()
    doc_rows = (dump.get("doc_stats") or {}).get("rows") or []
    for row in doc_rows:
        did = row.get("doc_id")
        if not did:
            continue
        # Directly seed inner state (avoids driving up counters via
        # record() N times when N can be huge on real dumps)
        from ..middleware.doc_stats import DocumentStats as _DS
        doc_store._data[did] = _DS(   # noqa: SLF001
            doc_id=did,
            n_retrieved=int(row.get("n_retrieved", 0)),
            n_cited=int(row.get("n_cited", 0)),
            first_seen_at=row.get("first_seen_at"),
            last_retrieved_at=row.get("last_retrieved_at"),
            last_cited_at=row.get("last_cited_at"),
        )
    totals["doc_stats"] = len(doc_rows)

    # retriever_attribution
    if req.replace:
        set_retriever_attribution(RetrieverAttributionStore())
    ra = get_retriever_attribution()
    from ..middleware.retriever_attribution import RetrieverStats as _RS
    for row in (dump.get("retriever_attribution") or {}).get("rows") or []:
        nm = row.get("name")
        if not nm:
            continue
        ra._data[nm] = _RS(   # noqa: SLF001
            name=nm,
            n_contributed=int(row.get("n_contributed", 0)),
            n_doc_contributions=int(row.get("n_doc_contributions", 0)),
            n_cited_contributions=int(row.get("n_cited_contributions", 0)),
            first_seen_at=row.get("first_seen_at"),
            last_seen_at=row.get("last_seen_at"),
        )
    totals["retriever_attribution"] = len(
        (dump.get("retriever_attribution") or {}).get("rows") or [])

    # cocitation — replay via record() (preserves partner index)
    if req.replace:
        set_cocitation(CoCitationStore())
    cc = get_cocitation()
    n_cc = 0
    for pair in (dump.get("cocitation") or {}).get("pairs") or []:
        count = int(pair.get("count", 0))
        a = pair.get("a"); b = pair.get("b")
        if not a or not b or count <= 0:
            continue
        # Bump the pair count ``count`` times via direct state access —
        # replay would create a new n_events per iteration which skews
        # the counter.
        from ..middleware.cocitation import _pair_key as _pk
        k = _pk(a, b)
        cc._pairs[k] = int(cc._pairs.get(k, 0)) + count   # noqa: SLF001
        cc._partners[a].add(b)                             # noqa: SLF001
        cc._partners[b].add(a)                             # noqa: SLF001
        ls = pair.get("last_seen")
        if ls is not None:
            cc._last_seen[k] = float(ls)                   # noqa: SLF001
        n_cc += 1
    n_events_cc = int((dump.get("cocitation") or {}).get("n_events", 0))
    if n_events_cc:
        cc._n_events = cc._n_events + n_events_cc         # noqa: SLF001
    totals["cocitation_pairs"] = n_cc

    # query_stats
    if req.replace:
        set_query_stats(QueryStatsStore())
    qs = get_query_stats()
    from ..middleware.query_stats import QueryStats as _QS
    for row in (dump.get("query_stats") or {}).get("rows") or []:
        fp = row.get("fingerprint")
        if not fp:
            continue
        existing = qs._data.get(fp)        # noqa: SLF001
        if existing and not req.replace:
            # Merge: add counts; keep earliest first_seen_at; latest
            # last_seen_at; sum sources/latency.
            existing.count += int(row.get("count", 0))
            existing.sum_sources += int(row.get("sum_sources", 0))
            existing.sum_latency_ms += float(
                row.get("sum_latency_ms", 0.0))
            if (row.get("first_seen_at") is not None and
                    (existing.first_seen_at is None or
                     row["first_seen_at"] < existing.first_seen_at)):
                existing.first_seen_at = row["first_seen_at"]
            if (row.get("last_seen_at") is not None and
                    (existing.last_seen_at is None or
                     row["last_seen_at"] > existing.last_seen_at)):
                existing.last_seen_at = row["last_seen_at"]
        else:
            qs._data[fp] = _QS(             # noqa: SLF001
                fingerprint=fp,
                sample=row.get("sample", ""),
                count=int(row.get("count", 0)),
                first_seen_at=row.get("first_seen_at"),
                last_seen_at=row.get("last_seen_at"),
                sum_sources=int(row.get("sum_sources", 0)),
                sum_latency_ms=float(row.get("sum_latency_ms", 0.0)),
            )
    totals["query_stats"] = len(
        (dump.get("query_stats") or {}).get("rows") or [])

    # doc_freshness
    if req.replace:
        set_doc_freshness(DocFreshnessStore())
    fs = get_doc_freshness()
    from ..middleware.doc_freshness import DocFreshness as _DF
    for row in (dump.get("doc_freshness") or {}).get("rows") or []:
        did = row.get("doc_id")
        if not did:
            continue
        fs._data[did] = _DF(   # noqa: SLF001
            doc_id=did,
            added_at=float(row.get("added_at") or 0.0),
            last_modified_at=row.get("last_modified_at"),
            n_modifications=int(row.get("n_modifications", 0)),
        )
    totals["doc_freshness"] = len(
        (dump.get("doc_freshness") or {}).get("rows") or [])

    # query_doc_affinity
    if req.replace:
        set_query_doc_affinity(QueryDocAffinityStore())
    qda = get_query_doc_affinity()
    n_qda = 0
    for pair in (dump.get("query_doc_affinity") or {}).get("pairs") or []:
        fp = pair.get("fingerprint"); did = pair.get("doc_id")
        count = int(pair.get("count", 0))
        if not fp or not did or count <= 0:
            continue
        k = (fp, did)
        qda._pairs[k] = int(qda._pairs.get(k, 0)) + count  # noqa: SLF001
        qda._by_query[fp].add(did)                          # noqa: SLF001
        qda._by_doc[did].add(fp)                            # noqa: SLF001
        ls = pair.get("last_seen")
        if ls is not None:
            qda._last_seen[k] = float(ls)                    # noqa: SLF001
        n_qda += 1
    n_events_qda = int(
        (dump.get("query_doc_affinity") or {}).get("n_events", 0))
    if n_events_qda:
        qda._n_events = qda._n_events + n_events_qda       # noqa: SLF001
    totals["query_doc_affinity_pairs"] = n_qda

    get_obs().audit(
        "analytics.restore",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        version=ver, replace=bool(req.replace),
        totals=totals,
    )
    return {
        "restored":  True,
        "replace":   bool(req.replace),
        "version":   ver,
        "totals":    totals,
    }


def admin_query_doc_affinity_summary():
    """Rollup of query × doc affinity matrix: n_events, n_pairs,
    n_queries, n_docs, total_count (v1.96)."""
    from ..middleware import get_query_doc_affinity
    return get_query_doc_affinity().summary()


def admin_query_top_docs(fingerprint: str, n: int = 10):
    """Which docs does this query most often cite? (v1.96).

    ``fingerprint`` is the v1.89 canonical fingerprint. For a text
    query, run it through ``_fingerprint(canonical)`` first — or use
    v1.89 lookup endpoints."""
    from ..middleware import get_query_doc_affinity, get_query_stats
    store = get_query_doc_affinity()
    rows = store.top_docs_for_query(fingerprint, n=int(n))
    # Bonus: include the sample text from v1.89 if known
    qs_row = get_query_stats().get(fingerprint)
    return {
        "fingerprint": fingerprint,
        "sample":      qs_row.sample if qs_row else None,
        "top_docs":    rows,
    }


def admin_doc_top_queries(doc_id: str, n: int = 10):
    """Which queries lead to this doc being cited? (v1.96).

    Returns fingerprints + counts, with each fingerprint's sample
    text attached if still known to v1.89's query_stats store."""
    from ..middleware import get_query_doc_affinity, get_query_stats
    store = get_query_doc_affinity()
    rows = store.top_queries_for_doc(doc_id, n=int(n))
    # Enrich with query text samples from v1.89
    qs = get_query_stats()
    for row in rows:
        r = qs.get(row["fingerprint"])
        row["sample"] = r.sample if r else None
    return {"doc_id": doc_id, "top_queries": rows}


def admin_query_doc_affinity_reset(request: Request):
    """Wipe the query × doc affinity matrix + audit (v1.96)."""
    from ..middleware import get_query_doc_affinity
    store = get_query_doc_affinity()
    before = store.summary()
    store.clear()
    get_obs().audit(
        "query_doc_affinity.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        prev_n_pairs=before["n_pairs"],
        prev_total_count=before["total_count"],
    )
    return {"reset": True, "before": before}


def admin_doc_update_priorities(
    n: int = 20,
    min_cited: int = 1,
    older_than_days: float = 0.0,
    alpha: float = 1.0,
):
    """Rank docs by "needs update" priority (v1.95) — cross-cut of
    v1.82 doc_stats × v1.94 doc_freshness.

    Priority score = ``n_cited * (age_days ** alpha)``. High traffic
    plus stale content → high score. Docs get flagged from both
    sides: low cite = not worth the review; recently-modified =
    doesn't need review yet. ``alpha`` tunes how heavily age
    dominates (α>1 = age matters more; α<1 = traffic matters more).

    Filters:
      * ``min_cited`` — minimum citation count (default 1 — skip
                         cold docs entirely, they're content-audit's
                         problem, not update's).
      * ``older_than_days`` — minimum age in days (default 0 — let
                               the caller decide what ''stale'' means).
      * ``n`` — cap on rows returned.
      * ``alpha`` — exponent on age_days in the score. 1.0 is linear
                     (balanced); raise to prioritize stale content
                     harder.
    """
    from ..middleware import get_doc_stats, get_doc_freshness
    import time as _t

    docs = get_doc_stats()
    fresh = get_doc_freshness()
    now = _t.time()

    # Walk the smaller side (whichever has fewer entries) and join.
    # We go through doc_stats (usually <= corpus size) because stale
    # docs with zero traffic are noise here — we want things BOTH
    # sides know about.
    rows: List[Dict[str, Any]] = []
    for doc_row in docs.top_cited(n=10 ** 9):
        did = doc_row["doc_id"]
        if doc_row["n_cited"] < int(min_cited):
            continue
        f = fresh.get(did)
        if f is None:
            continue
        ref_ts = f.last_modified_at or f.added_at
        age_days = max(0.0, (now - ref_ts) / 86400.0)
        if age_days < float(older_than_days):
            continue
        try:
            aged = age_days ** float(alpha)
        except (OverflowError, ValueError):
            aged = age_days
        score = float(doc_row["n_cited"]) * aged
        rows.append({
            "doc_id":         did,
            "n_cited":        doc_row["n_cited"],
            "n_retrieved":    doc_row["n_retrieved"],
            "cite_rate":      doc_row.get("cite_rate", 0.0),
            "added_at":       f.added_at,
            "last_modified_at": f.last_modified_at,
            "age_days":       round(age_days, 2),
            "n_modifications": f.n_modifications,
            "priority_score": round(score, 2),
        })

    rows.sort(key=lambda r: r["priority_score"], reverse=True)
    rows = rows[:max(0, int(n))]
    return {
        "n_candidates":    len(rows),
        "n":               int(n),
        "min_cited":       int(min_cited),
        "older_than_days": float(older_than_days),
        "alpha":           float(alpha),
        "candidates":      rows,
    }


def get_document_freshness(doc_id: str):
    """Per-doc freshness: added_at, last_modified_at, n_modifications,
    age_s, age_days (v1.94)."""
    from ..middleware import get_doc_freshness
    row = get_doc_freshness().get(doc_id)
    if row is None:
        raise HTTPException(
            status_code=404,
            detail={"error": "no freshness record for doc",
                    "doc_id": doc_id},
        )
    return row.to_dict()


def get_document_related(doc_id: str, n: int = 10):
    """Docs most commonly co-cited with ``doc_id`` in actual traffic
    (v1.84). Empirical 'related' — purely behavioural."""
    from ..middleware import get_cocitation
    related = get_cocitation().related(doc_id, n=int(n))
    return {"doc_id": doc_id, "related": related}


def get_document_stats(doc_id: str):
    """Per-document citation + retrieval counters (v1.82)."""
    from ..middleware import get_doc_stats
    row = get_doc_stats().get(doc_id)
    if row is None:
        raise HTTPException(
            status_code=404,
            detail={"error": "no stats for doc", "doc_id": doc_id},
        )
    return row.to_dict()


def get_document_chunks(doc_id: str, chunker: Optional[str] = None):
    """Return the chunks the retrievers actually index for this doc. Re-runs
    the configured chunker on-demand; pass ``?chunker=sentence`` to preview
    alternative chunkings without changing the index."""
    d = _pipeline.get_document(doc_id)
    if d is None:
        raise HTTPException(status_code=404, detail="document not found")
    chunks = _pipeline.get_chunks(doc_id, chunker=chunker)
    return {
        "doc_id":   doc_id,
        "n_chunks": len(chunks),
        "chunker":  chunker or getattr(_pipeline, "_chunker_last", "fixed"),
        "chunks":   chunks,
    }


def get_document(doc_id: str):
    d = _pipeline.get_document(doc_id)
    if d is None:
        # Fall back to the live unified pipeline (set_pipeline) — different
        # singleton when /v1/data/load_jsonl was used vs autoload path.
        try:
            from ..pipeline import get_pipeline as _gp
            d = _gp().get_document(doc_id)
        except Exception:
            d = None
    if d is None:
        raise HTTPException(status_code=404, detail="document not found")
    # Lazy-text resolution: when TAU_RAG_LAZY_TEXT=1 and metadata flagged
    # the doc as lazy, the in-memory `text` is empty by design — fetch
    # it from the SQLite-backed LazyTextStore on demand.
    text = d.text or ""
    if not text and (d.metadata or {}).get("_lazy_text"):
        try:
            from ..pipeline import get_pipeline as _gp
            text = _gp().get_text(d.id) or ""
        except Exception:
            pass
    return {"id": d.id, "text": text, "metadata": d.metadata or {}}


def replace_document(doc_id: str, body: DocumentBody):
    if body.id != doc_id:
        raise HTTPException(status_code=422,
                            detail={"path_id_mismatch":
                                    {"url": doc_id, "body": body.id}})
    try:
        # Reuse the doc-size validator
        validate_doc_list([body])
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    doc = Document(id=body.id, text=body.text, metadata=body.metadata)
    ok = _pipeline.replace_document(doc)
    if not ok:
        raise HTTPException(status_code=404, detail="document not found")
    return {"replaced": True, "id": doc_id}


def delete_document(doc_id: str):
    ok = _pipeline.delete_document(doc_id)
    if not ok:
        raise HTTPException(status_code=404, detail="document not found")
    return {"deleted": True, "id": doc_id}


def clear_documents(request: Request):
    n = _pipeline.clear_documents()
    get_obs().audit(
        "documents.clear",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        removed=n,
    )
    return {"cleared": True, "removed": n}


# ---- bulk ingest (JSONL + CSV streaming) ------------------------------
import csv as _csv   # noqa: E402
import io as _io    # noqa: E402
import json          # noqa: E402


def _parse_jsonl(text: str):
    """Yield (row_num, doc_dict_or_error) tuples for a JSONL payload.
    Blank lines and `#` comment lines are skipped silently."""
    for i, line in enumerate(text.splitlines(), start=1):
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        try:
            obj = json.loads(line)
            if not isinstance(obj, dict):
                raise ValueError("row is not a JSON object")
            yield i, obj
        except Exception as e:
            yield i, {"__error__": str(e)}


def _parse_csv(text: str):
    """Yield (row_num, doc_dict_or_error) tuples for a CSV payload.
    Expects columns: id, text, and optional metadata columns merged into
    a single metadata dict."""
    reader = _csv.DictReader(_io.StringIO(text))
    if reader.fieldnames is None or "id" not in reader.fieldnames \
            or "text" not in reader.fieldnames:
        raise HTTPException(
            status_code=400,
            detail={"csv_missing_columns": "required: 'id' and 'text'"},
        )
    meta_cols = [c for c in reader.fieldnames if c not in ("id", "text")]
    for i, row in enumerate(reader, start=2):  # row 1 is header
        try:
            metadata = {c: row[c] for c in meta_cols if row.get(c) not in (None, "")}
            yield i, {"id": row["id"], "text": row["text"],
                       "metadata": metadata}
        except Exception as e:
            yield i, {"__error__": str(e)}


async def bulk_ingest_documents(request: Request):
    """Bulk ingest — JSONL (one ``{"id","text","metadata"}`` per line) or
    CSV (columns: id, text, [any other] → metadata). Partial success
    semantics: each row parsed+validated independently, successes indexed
    into the pipeline, failures reported with row numbers.

    Content-Type:
        * ``application/x-ndjson`` or ``application/jsonl`` → JSONL
        * ``text/csv`` → CSV
        * anything else → JSONL (default)
    """
    ct = (request.headers.get("content-type") or "").split(";", 1)[0].strip().lower()
    raw = (await request.body()).decode("utf-8", errors="replace")
    if ct in ("text/csv",):
        iterator = _parse_csv(raw)
    else:
        iterator = _parse_jsonl(raw)

    # Enforce per-batch size limit from v1.35
    from .errors import Limits
    accepted: List[Document] = []
    errors: List[Dict[str, Any]] = []
    row_n = 0
    for row_num, obj in iterator:
        row_n += 1
        if "__error__" in obj:
            errors.append({"row": row_num, "error": obj["__error__"]})
            continue
        text = obj.get("text")
        if not isinstance(text, str) or not text:
            errors.append({"row": row_num,
                           "error": "missing or empty 'text'"})
            continue
        if len(text) > Limits.max_doc_text_len:
            errors.append({"row": row_num,
                           "error": f"text exceeds {Limits.max_doc_text_len} chars"})
            continue
        if len(accepted) >= Limits.max_docs_per_batch:
            errors.append({"row": row_num,
                           "error": f"batch cap reached (max {Limits.max_docs_per_batch})"})
            continue
        accepted.append(Document(
            id=obj.get("id") or f"row-{row_num}",
            text=text,
            metadata=obj.get("metadata") or {},
        ))

    chunks = _pipeline.add_documents(accepted) if accepted else 0

    get_obs().audit(
        "documents.bulk_ingest",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        content_type=ct, rows_total=row_n,
        accepted=len(accepted), errors=len(errors),
    )
    return {
        "accepted":    [d.id for d in accepted],
        "errors":      errors,
        "added_chunks": chunks,
        "rows_total":  row_n,
    }


def search(req: SearchRequest):
    try:
        validate_query_text(req.query)
        validate_k(req.k)
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))
    try:
        strategy = Strategy(req.strategy)
    except ValueError:
        raise HTTPException(status_code=400,
                            detail={"bad_strategy": req.strategy})
    q = Query(
        text=req.query, lang=req.lang, filters=req.filters,
        strategy=strategy, k=req.k, rerank_k=req.rerank_k,
    )
    per = _pipeline.retrievers.search_per_retriever(q, req.k)
    return {
        "per_retriever": {
            name: [{"doc": r.chunk.doc_id, "chunk": r.chunk.chunk_id,
                    "score": r.score, "rank": r.rank, "text": r.chunk.text[:300]}
                   for r in lst]
            for name, lst in per.items()
        }
    }


# ---- batch query (v1.54) -------------------------------------------------
def batch_query(req: BatchQueryRequest, request: Request):
    """Run many queries in a single HTTP call — useful for eval runners,
    benchmarks, and bulk re-indexing workflows.

    Cap: ``Limits.max_docs_per_batch`` items per call (reusing the doc-limit
    env knob). Each item is validated independently; per-item errors are
    returned alongside successful responses. Cache + rate-limit apply to
    the overall request, not per-item (so a single admin call can sweep
    many queries without tripping the limiter).
    """
    from .errors import Limits
    if not req.queries:
        return {"n": 0, "results": [], "errors": [], "total_ms": 0}
    if len(req.queries) > Limits.max_docs_per_batch:
        raise HTTPException(
            status_code=413,
            detail=f"too many queries — max {Limits.max_docs_per_batch} per batch",
        )

    import time as _t
    t0 = _t.time()
    results: List[Dict[str, Any]] = []
    errors: List[Dict[str, Any]] = []

    for i, item in enumerate(req.queries, start=1):
        try:
            validate_query_text(item.query)
            validate_k(item.k)
            strategy = Strategy(item.strategy)
            q = Query(text=item.query, lang=item.lang, filters=item.filters,
                      strategy=strategy, k=item.k, rerank_k=item.rerank_k)
            resp = _pipeline.run(q)
            try:
                omega = float(resp.signals.omega) if resp.signals else None
            except Exception:
                omega = None
            verif = getattr(resp, "verification", None)
            results.append({
                "index":   i,
                "query":   item.query,
                "answer":  resp.answer or "",
                "sources": list(resp.sources or []),
                "omega":   omega,
                "passed":  bool(getattr(verif, "passed", False)) if verif else None,
            })
        except HTTPException as e:
            errors.append({"index": i, "error": str(e.detail),
                           "status": e.status_code})
        except Exception as e:
            errors.append({"index": i,
                           "error": f"{type(e).__name__}: {e}"[:240],
                           "status": 500})

    total_ms = (_t.time() - t0) * 1000.0
    get_obs().audit(
        "batch.query",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        n=len(req.queries), errors=len(errors),
        total_ms=round(total_ms, 2),
    )
    return {
        "n":        len(req.queries),
        "results":  results,
        "errors":   errors,
        "total_ms": round(total_ms, 2),
        "avg_ms":   round(total_ms / max(1, len(req.queries)), 2),
    }


def generate_stream(req: SearchRequest, request: Request):
    """Server-Sent Events version of /v1/generate.

    Emits events in order:
        event: retrieved  data: {"doc_ids": [...], "count": N}
        event: answer     data: {"chunk": "word "}           (repeated)
        event: done       data: {"answer","sources","signals","verification",
                                 "passed","omega"}
        event: error      data: {"code","message"}           (on failure)

    Flow: runs the full pipeline synchronously (it's ~ms for extractive)
    and streams the staged output. Each SSE event is <event><LF><data><LF><LF>.
    Clients:
        * browser: EventSource('/v1/generate/stream' ... POST)
        * curl --no-buffer -N -H 'Content-Type: application/json' \
                 -d '{"query":"..."}' http://localhost:8000/v1/generate/stream
        * SDK:  for ev in client.stream_query("..."): ...
    """
    try:
        validate_query_text(req.query)
        validate_k(req.k)
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))
    try:
        strategy = Strategy(req.strategy)
    except ValueError:
        raise HTTPException(status_code=400,
                            detail={"bad_strategy": req.strategy})

    from fastapi.responses import StreamingResponse
    import json as _json

    def _sse(event: str, data: Any) -> str:
        return f"event: {event}\ndata: {_json.dumps(data, ensure_ascii=False)}\n\n"

    def _event_gen():
        try:
            q = Query(text=req.query, lang=req.lang, filters=req.filters,
                      strategy=strategy, k=req.k, rerank_k=req.rerank_k)
            resp = _pipeline.run(q)

            # Stage 1: retrieval results
            retrieved = []
            seen = set()
            for c in getattr(resp, "retrieved", []) or []:
                did = getattr(getattr(c, "chunk", None), "doc_id", None)
                if did and did not in seen:
                    retrieved.append(did)
                    seen.add(did)
            yield _sse("retrieved",
                       {"doc_ids": retrieved, "count": len(retrieved)})

            # Stage 2: answer streamed word-by-word
            answer = resp.answer or ""
            words = answer.split(" ")
            for w in words:
                if not w:
                    continue
                yield _sse("answer", {"chunk": w + " "})

            # Stage 3: final envelope
            try:
                omega = float(resp.signals.omega) if resp.signals else None
            except Exception:
                omega = None
            verif = getattr(resp, "verification", None)
            yield _sse("done", {
                "answer":  answer,
                "sources": list(resp.sources or []),
                "omega":   omega,
                "passed":  bool(getattr(verif, "passed", False)) if verif else None,
                "verification": (verif.to_dict() if hasattr(verif, "to_dict")
                                 else getattr(verif, "__dict__", None)),
            })
        except Exception as e:
            yield _sse("error", {
                "code":    "internal_error",
                "message": f"{type(e).__name__}: {e}"[:240],
            })

    return StreamingResponse(
        _event_gen(),
        media_type="text/event-stream",
        headers={
            "Cache-Control":     "no-cache",
            "X-Accel-Buffering": "no",    # nginx — flush immediately
        },
    )


def generate(req: SearchRequest):
    try:
        validate_query_text(req.query)
        validate_k(req.k)
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))
    try:
        strategy = Strategy(req.strategy)
    except ValueError:
        raise HTTPException(status_code=400,
                            detail={"bad_strategy": req.strategy})

    # Cache hit short-circuit
    cache = get_cache()
    cache_key = cache.make_key(
        f"{req.query}|{req.strategy}|{req.k}|{req.rerank_k}",
        req.lang, req.filters,
    )
    cached = cache.get(cache_key)
    if cached is not None:
        cached = dict(cached)
        cached["_cache"] = "hit"
        return cached

    q = Query(
        text=req.query, lang=req.lang, filters=req.filters,
        strategy=strategy, k=req.k, rerank_k=req.rerank_k,
    )
    out = _pipeline.run(q).to_dict()
    cache.put(cache_key, out)
    out = dict(out); out["_cache"] = "miss"
    return out


# ---- saved query presets (v1.66) ----------------------------------------
def list_query_presets():
    """List all saved query presets. Unauthenticated (queries are public)."""
    from ..presets import get_preset_store
    presets = get_preset_store().list_all()
    return {"count": len(presets), "presets": presets}


def get_query_preset(name: str):
    from ..presets import get_preset_store
    p = get_preset_store().get(name)
    if p is None:
        raise HTTPException(status_code=404,
                            detail={"preset_not_found": name})
    return p.to_dict()


def save_query_preset(name: str, body: QueryPresetBody, request: Request):
    from ..presets import QueryPreset, get_preset_store
    try:
        validate_query_text(body.query)
        validate_k(body.k)
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))
    preset = QueryPreset(
        name=name, query=body.query, k=body.k, rerank_k=body.rerank_k,
        strategy=body.strategy, lang=body.lang, notes=body.notes,
    )
    get_preset_store().put(preset)
    get_obs().audit(
        "query_preset.put",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=name,
    )
    return preset.to_dict()


def delete_query_preset(name: str, request: Request):
    from ..presets import get_preset_store
    ok = get_preset_store().remove(name)
    if not ok:
        raise HTTPException(status_code=404,
                            detail={"preset_not_found": name})
    get_obs().audit(
        "query_preset.remove",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=name,
    )
    return {"removed": True, "name": name}


def run_query_preset(name: str):
    """Execute a saved preset — equivalent to ``POST /v1/generate`` with
    the stored parameters. Returns the full /generate response."""
    from ..presets import get_preset_store
    p = get_preset_store().get(name)
    if p is None:
        raise HTTPException(status_code=404,
                            detail={"preset_not_found": name})
    req = SearchRequest(query=p.query, k=p.k, rerank_k=p.rerank_k,
                        strategy=p.strategy, lang=p.lang, filters={})
    return generate(req)


def generate_timings(req: SearchRequest):
    """Run the query but return only the per-stage latency breakdown + Ω —
    no answer text, no candidate list. For ops/profiling workflows that need
    to know WHERE time is spent, not WHAT was returned.

    Response shape:
        {
          "query":      "...",
          "timings_ms": {understand, retrieve, fuse, rerank, generate,
                          verify, signals, total},
          "omega":      0.67,
          "n_sources":  2,
          "cache":      "miss"
        }
    """
    try:
        validate_query_text(req.query)
        validate_k(req.k)
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))
    try:
        strategy = Strategy(req.strategy)
    except ValueError:
        raise HTTPException(status_code=400,
                            detail={"bad_strategy": req.strategy})

    q = Query(
        text=req.query, lang=req.lang, filters=req.filters,
        strategy=strategy, k=req.k, rerank_k=req.rerank_k,
    )
    resp = _pipeline.run(q)
    try:
        omega = float(resp.signals.omega) if resp.signals else None
    except Exception:
        omega = None
    return {
        "query":      req.query,
        "timings_ms": dict(resp.timing_ms or {}),
        "omega":      omega,
        "n_sources":  len(resp.sources or []),
        "passed":     (bool(resp.verification.passed)
                       if resp.verification else None),
    }


@app.get("/v1/admin/stats")
def admin_stats():
    from ..middleware import get_webhook_dispatcher
    return {
        "cache": get_cache().stats(),
        "rate_limiter": get_limiter().stats(),
        "observability": get_obs().stats(),
        "cached_queries": len(_pipeline.cache),
        "webhook": get_webhook_dispatcher().stats(),
    }


def admin_webhook_stats():
    """Report the audit webhook dispatcher state (v1.71) including the
    circuit breaker status (v1.79)."""
    from ..middleware import get_webhook_dispatcher
    return get_webhook_dispatcher().stats()


# ---- alert rules (v1.80) -------------------------------------------------
class AlertRuleRequest(BaseModel):
    name:        str
    metric:      str
    op:          str
    threshold:   float
    window_s:    float = 300.0
    cooldown_s:  float = 600.0
    enabled:     bool = True
    description: str = ""


def admin_alerts_list():
    """List all configured alert rules (v1.80)."""
    from ..middleware import get_alert_store
    return {"rules": [r.to_dict() for r in get_alert_store().list_all()]}


@app.put("/v1/admin/alerts/{name}")
def admin_alerts_put(name: str, req: AlertRuleRequest, request: Request):
    """Create or update an alert rule. ``name`` in the path wins over
    the body — consistent with PUT semantics."""
    from ..middleware import get_alert_store, AlertRule
    try:
        rule = AlertRule(
            name=name,
            metric=req.metric, op=req.op,
            threshold=float(req.threshold),
            window_s=float(req.window_s),
            cooldown_s=float(req.cooldown_s),
            enabled=bool(req.enabled),
            description=req.description,
        )
        rule = get_alert_store().put(rule)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    get_obs().audit(
        "alert.rule.put",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=name, metric=req.metric, op=req.op,
        threshold=float(req.threshold),
    )
    return rule.to_dict()


@app.delete("/v1/admin/alerts/{name}")
def admin_alerts_delete(name: str, request: Request):
    from ..middleware import get_alert_store
    ok = get_alert_store().delete(name)
    if not ok:
        raise HTTPException(status_code=404,
                            detail={"error": "alert rule not found",
                                    "name": name})
    get_obs().audit(
        "alert.rule.delete",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=name,
    )
    return {"deleted": True, "name": name}


def admin_alerts_scheduler_status():
    """Report the background alert-evaluator state (v1.81)."""
    from ..middleware import get_alert_scheduler
    sched = get_alert_scheduler()
    if sched is None:
        return {"enabled": False, "is_running": False}
    return {"enabled": True, **sched.status()}


def admin_alerts_evaluate(request: Request):
    """Run every enabled rule once against the current metrics history.
    Returns a list of verdicts; rules that fire also emit an
    ``alert.fired`` audit event (which flows through the webhook
    dispatcher per v1.71)."""
    from ..middleware import (
        get_alert_store, get_metrics_history, evaluate_all,
    )
    actor = request.headers.get("x-api-key")
    rid = getattr(request.state, "request_id", None)

    def _on_fire(verdict):
        get_obs().audit(
            "alert.fired",
            actor_key=actor, request_id=rid,
            rule=verdict["rule"],
            reason=verdict["reason"],
            latest_value=verdict["latest_value"],
            n_samples=verdict["n_samples"],
        )

    verdicts = evaluate_all(
        get_alert_store(), get_metrics_history(), on_fire=_on_fire,
    )
    return {
        "verdicts":     verdicts,
        "n_fired":      sum(1 for v in verdicts if v["fired"]),
        "n_suppressed": sum(1 for v in verdicts if v["suppressed"]),
        "n_evaluated":  len(verdicts),
    }


def admin_webhook_breaker_reset(request: Request):
    """Manually force the audit-webhook circuit breaker back to CLOSED
    and clear its failure counters (v1.79). Useful after fixing a
    downstream outage without waiting for the cooldown probe."""
    from ..middleware import get_webhook_dispatcher
    disp = get_webhook_dispatcher()
    before = disp.breaker.stats()
    disp.breaker.reset()
    after = disp.breaker.stats()
    get_obs().audit(
        "webhook.breaker.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        prev_state=before["state"],
        prev_fail_count=before["fail_count"],
    )
    return {"reset": True, "before": before, "after": after}


def admin_metrics_history(
    since: Optional[float] = None,
    until: Optional[float] = None,
    metric: Optional[str] = None,
    limit: int = 1000,
):
    """Return sampled time-series metrics (v1.78).

    Query params:
      * ``since`` / ``until`` — Unix timestamps bounding the window.
      * ``metric`` — dotted path (e.g. ``obs.p95_ms``, ``cache.hit_rate``,
                     ``limiter.denied``) to project each sample to
                     ``{ts, value}``. Omit for full samples.
      * ``limit``  — hard cap on rows returned (default 1000).

    The sampler is off by default; enable via
    ``TAU_RAG_METRICS_HISTORY_INTERVAL_SEC=10`` at server start, or
    call ``MetricsHistorySampler(h, interval_s=10.0).start()`` directly.
    """
    from ..middleware import get_metrics_history, get_metrics_sampler
    h = get_metrics_history()
    rows = h.history(since=since, until=until, metric=metric)
    # Enforce upper bound — take the newest ``limit`` rows.
    if limit and len(rows) > int(limit):
        rows = rows[-int(limit):]
    sampler = get_metrics_sampler()
    return {
        "samples":   rows,
        "count":     len(rows),
        "capacity":  h.capacity(),
        "metric":    metric,
        "sampler":   (sampler.status() if sampler else
                      {"is_running": False, "interval_s": None}),
    }


def admin_metrics_history_sample_now(request: Request):
    """Force one immediate sample (useful for tests and 'capture before
    change' workflows). Returns the sample that was just captured."""
    from ..middleware import get_metrics_history
    row = get_metrics_history().sample()
    get_obs().audit(
        "metrics.sample",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        ts=row["ts"],
    )
    return row


def admin_cache_clear(request: Request):
    get_cache().clear()
    _pipeline.cache.clear()
    get_obs().audit(
        "cache.clear",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
    )
    return {"cleared": True}


def admin_logs(n: int = 50, event_type: Optional[str] = None):
    """Tail the in-memory request/audit log. `event_type=audit` to see
    just admin actions, `event_type=request` for HTTP traffic."""
    return {"logs": get_obs().tail(n=n, event_type=event_type)}


def admin_audit_export(
    since: Optional[float] = None,
    until: Optional[float] = None,
    event_type: Optional[str] = None,
    format: str = "jsonl",
    limit: int = 10_000,
):
    """Export the observability ring buffer as JSONL (default) or JSON.

    Query params:
      * ``since``       — Unix timestamp, inclusive lower bound on entry.ts
      * ``until``       — Unix timestamp, exclusive upper bound
      * ``event_type``  — ``audit`` | ``request`` (omit = both)
      * ``format``      — ``jsonl`` (default, streamable) or ``json``
      * ``limit``       — hard cap on rows returned (default 10,000)

    Returns a file attachment with ``Content-Disposition``; intended to be
    piped to compliance storage, fed to a SIEM, or spot-checked with jq.
    """
    from fastapi.responses import PlainTextResponse
    import json as _json

    rows = get_obs().tail(n=max(1, int(limit)),
                          event_type=event_type)
    # Apply time filters (tail already returns newest-last)
    if since is not None:
        rows = [r for r in rows if float(r.get("ts") or 0) >= float(since)]
    if until is not None:
        rows = [r for r in rows if float(r.get("ts") or 0) < float(until)]

    if format == "json":
        body = _json.dumps(rows, ensure_ascii=False, indent=2)
        media = "application/json"
        suffix = "json"
    else:  # default jsonl
        body = "\n".join(_json.dumps(r, ensure_ascii=False) for r in rows)
        if rows:
            body += "\n"
        media = "application/x-ndjson"
        suffix = "jsonl"

    filename = f"tau-rag-audit.{suffix}"
    return PlainTextResponse(
        body,
        media_type=media,
        headers={
            "Content-Disposition": f'attachment; filename="{filename}"',
            "X-Entry-Count":       str(len(rows)),
        },
    )


def admin_logs_stream(
    event_type: Optional[str] = None,
    heartbeat_s: float = 15.0,
    replay_last: int = 0,
    max_events: int = 0,
    max_heartbeats: int = 0,
):
    """Live SSE tail of the observability log (v1.75).

    Query params:
      * ``event_type``     — ``audit`` | ``request`` (omit = both)
      * ``heartbeat_s``    — emit a ``:heartbeat`` comment every N seconds
                              so proxies/load balancers don't idle-kill the
                              connection (default 15s).
      * ``replay_last``    — on connect, emit the last N buffered entries
                              before live-tailing (0 = live only).
      * ``max_events``     — stop after this many ``log`` events (0 =
                              unbounded). Useful for bounded tails and
                              deterministic testing.
      * ``max_heartbeats`` — stop after this many heartbeat ticks (0 =
                              unbounded). Useful for "give me whatever's
                              there within N seconds, then close".

    Event names:
      * ``log``     — new entry (data is the same dict as ``/v1/admin/logs``)
      * (heartbeat is a ``:`` SSE comment line, per the SSE spec)

    Client disconnect frees the subscriber queue; drop-oldest on a slow
    reader so the request path is never back-pressured.
    """
    from fastapi.responses import StreamingResponse
    import json as _json
    import queue as _q

    obs = get_obs()
    sub = obs.subscribe(maxsize=256)

    def _sse(event: str, data: Any) -> str:
        return f"event: {event}\ndata: {_json.dumps(data, ensure_ascii=False)}\n\n"

    def _gen():
        emitted_logs = 0
        heartbeats = 0
        try:
            # Replay tail first (filtered by event_type if set)
            if replay_last and replay_last > 0:
                for row in obs.tail(n=int(replay_last), event_type=event_type):
                    yield _sse("log", row)
                    emitted_logs += 1
                    if max_events and emitted_logs >= max_events:
                        return
            # Live loop — block up to heartbeat_s for next entry; if
            # nothing arrived emit a comment keep-alive.
            hb = max(0.05, float(heartbeat_s))
            while True:
                try:
                    row = sub.get(timeout=hb)
                except _q.Empty:
                    heartbeats += 1
                    yield ": heartbeat\n\n"
                    if max_heartbeats and heartbeats >= max_heartbeats:
                        return
                    continue
                if event_type and row.get("event_type") != event_type:
                    continue
                yield _sse("log", row)
                emitted_logs += 1
                if max_events and emitted_logs >= max_events:
                    return
        except GeneratorExit:
            pass
        finally:
            obs.unsubscribe(sub)

    return StreamingResponse(
        _gen(),
        media_type="text/event-stream",
        headers={
            "Cache-Control":     "no-cache",
            "X-Accel-Buffering": "no",
        },
    )


def admin_ui(refresh: int = 0):
    """Unified read-only admin dashboard (HTML)."""
    from fastapi.responses import HTMLResponse
    from .admin_ui import render_admin_ui
    from .metrics import check_readiness

    _, ready_detail = check_readiness(_pipeline)
    html = render_admin_ui(
        cache_stats     = get_cache().stats(),
        limiter_stats   = get_limiter().stats(),
        obs_stats       = get_obs().stats(),
        recent_requests = get_obs().tail(n=20, event_type="request"),
        recent_audits   = get_obs().tail(n=20, event_type="audit"),
        keys            = get_auth().list_keys(),
        documents       = _pipeline.list_documents(),
        readiness       = ready_detail,
        refresh_sec     = int(refresh or 0),
    )
    return HTMLResponse(html)


# ---- snapshot / restore --------------------------------------------------
class SnapshotSaveRequest(BaseModel):
    path: Optional[str] = None
    rotate: int = 0          # v1.67: keep last N rotated generations


class SnapshotLoadRequest(BaseModel):
    path: Optional[str] = None
    replace: bool = False
    generation: int = 0      # v1.67: load a specific rotated generation


def _default_snapshot_path() -> str:
    return _os.environ.get("TAU_RAG_SNAPSHOT_PATH") or "runtime/snapshot.jsonl"


# ---- eval harness endpoint ----------------------------------------------
class EvalRequest(BaseModel):
    cases: List[Dict[str, Any]]
    k: int = 5
    thresholds: Optional[Dict[str, float]] = None


def admin_eval(req: EvalRequest, request: Request):
    """Run the pipeline against a gold set inline; return aggregate metrics.

    Body: ``{"cases": [{id, query, expected_doc_ids, expected_claims?, lang?}, ...],
              "k": 5, "thresholds": {"recall@5": 0.7, ...}}``
    """
    from ..eval import GoldCase, run_eval
    cases = [GoldCase.from_dict(c) for c in req.cases]
    report = run_eval(_pipeline, cases, k=req.k)
    failures = report.fail_below(req.thresholds or {})
    get_obs().audit(
        "eval.run",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        n_cases=report.n_cases,
        aggregate=report.aggregate,
        failures=failures,
    )
    return {
        "n_cases": report.n_cases,
        "aggregate": report.aggregate,
        "latency_ms": report.latency_ms,
        "omega": report.omega,
        "per_case": [c.to_dict() for c in report.per_case],
        "failures": failures,
        "passed": len(failures) == 0,
    }


def admin_snapshot_save(req: SnapshotSaveRequest, request: Request):
    path = req.path or _default_snapshot_path()
    summary = _pipeline.save_snapshot(path, rotate=int(req.rotate or 0))
    get_obs().audit(
        "snapshot.save",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        **summary,
    )
    return summary


def admin_snapshot_history(path: Optional[str] = None, max_gens: int = 10):
    """List rotated snapshot generations on disk. ``path`` defaults to the
    env-configured snapshot path."""
    from ..snapshot import list_snapshot_history
    base = path or _default_snapshot_path()
    return {
        "base_path":   base,
        "generations": list_snapshot_history(base, max_gens=max_gens),
    }


class SnapshotDiffRequest(BaseModel):
    a: str                          # path to snapshot A (the "before")
    b: str                          # path to snapshot B (the "after")
    include_details: bool = False   # expand modified[] to {id, lens, hashes}


def admin_snapshot_diff(req: SnapshotDiffRequest, request: Request):
    """Compare two snapshots (v1.77). Returns added / removed / modified
    doc IDs + per-snapshot metadata + a ``same_fingerprint`` boolean.

    Use cases:
      * CI: "what's new in this PR?" (diff main vs branch)
      * QA: "did we accidentally delete docs?" (diff yesterday vs today)
      * compliance: "what changed between quarters?"
    """
    from ..snapshot import diff_snapshots

    import os as _os
    for label, p in (("a", req.a), ("b", req.b)):
        if not _os.path.exists(p):
            raise HTTPException(
                status_code=404,
                detail={"error": "snapshot not found",
                        "which": label, "path": p},
            )
    result = diff_snapshots(req.a, req.b,
                            include_details=bool(req.include_details))
    get_obs().audit(
        "snapshot.diff",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        a=req.a, b=req.b,
        n_added=len(result["added"]),
        n_removed=len(result["removed"]),
        n_modified=len(result["modified"]),
    )
    return result


# ---- runtime config (v1.51) ---------------------------------------------
class ConfigUpdateRequest(BaseModel):
    # Whitelist of tunable fields. Add to `_TUNABLE` below before exposing.
    updates: Dict[str, Any]


# (path-in-Config, validator_fn, human_description)
_TUNABLE: Dict[str, Any] = {
    "verify.min_omega": (
        lambda v: isinstance(v, (int, float)) and 0.0 <= float(v) <= 1.0,
        "Minimum Ω signal for response.passed; 0.55 default",
    ),
    "verify.min_citation_coverage": (
        lambda v: isinstance(v, (int, float)) and 0.0 <= float(v) <= 1.0,
        "Fraction of answer claims that must be cited; 0.8 default",
    ),
}


def _config_to_dict(cfg) -> Dict[str, Any]:
    """Recursively convert the nested Config dataclass to a plain dict."""
    from dataclasses import is_dataclass, asdict
    if is_dataclass(cfg):
        return asdict(cfg)
    return dict(cfg)


def _apply_config_update(cfg, key_path: str, value: Any) -> None:
    """Set ``cfg.a.b.c = value`` for a dotted ``key_path``."""
    parts = key_path.split(".")
    obj = cfg
    for part in parts[:-1]:
        obj = getattr(obj, part)
    setattr(obj, parts[-1], value)


def admin_get_config():
    """Return the live effective configuration + list of tunable keys."""
    return {
        "config": _config_to_dict(_pipeline.config),
        "tunable": {
            k: {"description": desc}
            for k, (_, desc) in _TUNABLE.items()
        },
    }


def admin_update_config(req: ConfigUpdateRequest, request: Request):
    """Update whitelisted config values at runtime. Clears the query cache
    so subsequent requests use the new thresholds."""
    applied: Dict[str, Any] = {}
    rejected: List[Dict[str, Any]] = []
    for key, new_val in req.updates.items():
        entry = _TUNABLE.get(key)
        if entry is None:
            rejected.append({"key": key, "reason": "not in whitelist"})
            continue
        validator, _desc = entry
        if not validator(new_val):
            rejected.append({"key": key, "reason": "validation failed",
                             "value": new_val})
            continue
        try:
            _apply_config_update(_pipeline.config, key, new_val)
            applied[key] = new_val
        except Exception as e:
            rejected.append({"key": key,
                             "reason": f"{type(e).__name__}: {e}"})

    # Clear query cache so future calls use the new threshold
    if applied:
        _pipeline.cache.clear()
        get_cache().clear()

    get_obs().audit(
        "config.update",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        applied=applied, rejected=rejected,
    )
    return {"applied": applied, "rejected": rejected,
            "cache_cleared": bool(applied)}


# ---- Hebrew synonyms CRUD (v1.52) --------------------------------------
class SynonymBody(BaseModel):
    canonical: str
    variants: List[str]


def _maybe_autosave_synonyms() -> None:
    """If TAU_RAG_SYNONYMS_PATH is set, persist current dict to disk."""
    path = _os.environ.get("TAU_RAG_SYNONYMS_PATH")
    if not path:
        return
    try:
        from ..core.hebrew_synonyms import save_synonyms_jsonl
        save_synonyms_jsonl(path)
    except Exception as _e:
        print(f"[tau-rag] synonym autosave failed: {_e}")


def admin_list_synonyms(q: Optional[str] = None):
    """List synonym entries. ``?q=`` filters by substring in canonical or variants."""
    from ..core.hebrew_synonyms import list_synonyms
    all_syn = list_synonyms()
    if q:
        qn = q.strip()
        all_syn = {
            k: v for k, v in all_syn.items()
            if qn in k or any(qn in x for x in v)
        }
    return {"count": len(all_syn), "synonyms": all_syn}


class SynonymBulkRequest(BaseModel):
    entries: List[Dict[str, Any]]           # [{canonical, variants}, ...]
    replace: bool = False


def admin_bulk_synonyms(req: SynonymBulkRequest, request: Request):
    """Bulk add/replace synonyms. Each row: {canonical, variants: [...]}."""
    from ..core.hebrew_synonyms import (
        add_synonym, clear_synonyms as _clear,
    )
    if req.replace:
        _clear()
    added = 0
    errors: List[Dict[str, Any]] = []
    for i, e in enumerate(req.entries, start=1):
        try:
            add_synonym(e["canonical"], list(e.get("variants") or []))
            added += 1
        except Exception as ex:
            errors.append({"row": i, "error": str(ex)})
    _pipeline.cache.clear()
    get_cache().clear()
    _maybe_autosave_synonyms()
    get_obs().audit(
        "synonyms.bulk",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        added=added, errors=len(errors), replaced=req.replace,
    )
    return {"added": added, "errors": errors, "replaced": req.replace}


def admin_export_synonyms():
    """Export the current synonyms as JSONL — one ``{canonical, variants}`` per line."""
    from fastapi.responses import PlainTextResponse
    from ..core.hebrew_synonyms import list_synonyms
    import json as _json
    lines = [
        _json.dumps({"canonical": k, "variants": v}, ensure_ascii=False)
        for k, v in list_synonyms().items()
    ]
    body = "\n".join(lines) + ("\n" if lines else "")
    return PlainTextResponse(body, media_type="application/x-ndjson",
                             headers={
                                 "Content-Disposition":
                                 'attachment; filename="synonyms.jsonl"',
                             })


def admin_add_synonym(body: SynonymBody, request: Request):
    """Add a synonym entry or extend an existing one's variant list."""
    from ..core.hebrew_synonyms import add_synonym
    try:
        result = add_synonym(body.canonical, body.variants)
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))
    # Purge caches — old answers may have used stale expansion
    _pipeline.cache.clear()
    get_cache().clear()
    _maybe_autosave_synonyms()
    get_obs().audit(
        "synonyms.add",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        canonical=body.canonical, n_variants=len(result["variants"]),
    )
    return result


def admin_delete_synonym(canonical: str, request: Request):
    from ..core.hebrew_synonyms import remove_synonym
    ok = remove_synonym(canonical)
    if not ok:
        raise HTTPException(status_code=404,
                            detail={"canonical": canonical})
    _pipeline.cache.clear()
    get_cache().clear()
    _maybe_autosave_synonyms()
    get_obs().audit(
        "synonyms.remove",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        canonical=canonical,
    )
    return {"removed": True, "canonical": canonical}


def admin_snapshot_status():
    """Report the periodic auto-snapshotter state (if configured)."""
    auto = get_autosnapshotter()
    if auto is None:
        return {
            "enabled": False,
            "hint": ("set TAU_RAG_SNAPSHOT_PATH and "
                     "TAU_RAG_SNAPSHOT_INTERVAL=<seconds> to enable"),
        }
    return {"enabled": True, **auto.status()}


def admin_snapshot_load(req: SnapshotLoadRequest, request: Request):
    base_path = req.path or _default_snapshot_path()
    # Resolve which generation to load: 0 = current, N>0 = rotated backup
    from ..snapshot import _gen_path
    from pathlib import Path as _P
    resolved = str(_gen_path(_P(base_path), int(req.generation or 0)))
    if not _os.path.exists(resolved):
        raise HTTPException(
            status_code=404,
            detail={"snapshot_not_found": resolved,
                    "generation": int(req.generation or 0)},
        )
    summary = _pipeline.load_snapshot(resolved, replace=req.replace)
    summary["generation"] = int(req.generation or 0)
    summary["path_loaded"] = resolved
    get_obs().audit(
        "snapshot.load",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        path=resolved,
        **{k: v for k, v in summary.items()
           if k not in ("warnings", "generation", "path_loaded")},
    )
    return summary


# ---- API key management (admin-only endpoints) ---------------------------
class APIKeyCreateRequest(BaseModel):
    label: str
    scopes: List[str] = ["read", "write"]


def admin_create_key(req: APIKeyCreateRequest, request: Request):
    raw = get_auth().create(label=req.label, scopes=req.scopes)
    get_obs().audit(
        "key.create",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        label=req.label, scopes=req.scopes,
    )
    return {
        "api_key": raw,
        "label": req.label,
        "scopes": req.scopes,
        "warning": "save this key now — it cannot be retrieved later",
    }


def admin_list_keys():
    return {"keys": get_auth().list_keys()}


def admin_revoke_key(hash_prefix: str, request: Request):
    ok = get_auth().revoke(hash_prefix)
    if not ok:
        raise HTTPException(status_code=404, detail="key not found")
    get_obs().audit(
        "key.revoke",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        target_prefix=hash_prefix,
    )
    return {"revoked": True, "hash_prefix": hash_prefix}


class APIKeyRotateRequest(BaseModel):
    grace_seconds: float = 300.0


def admin_rotate_key(
    hash_prefix: str,
    req: APIKeyRotateRequest,
    request: Request,
):
    """Rotate an API key with a grace period (v1.76).

    Generates a new key with the same label+scopes. The old key remains
    valid until ``grace_seconds`` elapses, after which it stops working.
    Clients should rotate their config during the window.
    """
    result = get_auth().rotate(hash_prefix, grace_seconds=req.grace_seconds)
    if result is None:
        raise HTTPException(
            status_code=404,
            detail={"error": "key not found or already revoked",
                    "hash_prefix": hash_prefix},
        )
    get_obs().audit(
        "key.rotate",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        old_prefix=result["old_prefix"],
        new_prefix=result["new_prefix"],
        grace_seconds=result["grace_seconds"],
    )
    return {
        **result,
        "warning": "save the new key now — it cannot be retrieved later",
    }


# ---- v2.7 Maintenance / drain mode -------------------------------------
class MaintenanceOnRequest(BaseModel):
    reason: str = ""
    retry_after: int = 30


def admin_maintenance_on(req: MaintenanceOnRequest, request: Request):
    """Turn on maintenance / drain mode (v2.7).

    Non-admin requests get 503 + ``Retry-After`` until turned off.
    Admin callers (this endpoint included) always flow through.
    """
    from ..middleware.maintenance import get_maintenance
    m = get_maintenance()
    m.enable(reason=req.reason, retry_after=req.retry_after)
    get_obs().audit(
        "maintenance.on",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        reason=req.reason,
        retry_after=req.retry_after,
    )
    return {"ok": True, **m.snapshot()}


def admin_maintenance_off(request: Request):
    """Clear maintenance / drain mode (v2.7)."""
    from ..middleware.maintenance import get_maintenance
    m = get_maintenance()
    snap_before = m.snapshot()
    m.disable()
    get_obs().audit(
        "maintenance.off",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        duration_sec=round(snap_before["duration_sec"], 2),
    )
    return {"ok": True, **m.snapshot()}


def admin_maintenance_status():
    """Current maintenance / drain state (v2.7)."""
    from ..middleware.maintenance import get_maintenance
    return get_maintenance().snapshot()


# ---- v2.8 PII redaction -------------------------------------------------
class PIIToggleRequest(BaseModel):
    enabled: bool = True


def admin_pii_toggle(req: PIIToggleRequest, request: Request):
    """Enable or disable PII redaction at runtime (v2.8).

    Default is driven by ``TAU_RAG_PII_REDACT`` env at startup; this
    endpoint lets ops toggle without a restart (useful when you realize
    bodies were going to the log unredacted).
    """
    from ..middleware.pii_redaction import get_pii_redactor
    r = get_pii_redactor()
    r.set_enabled(req.enabled)
    get_obs().audit(
        "pii_redaction.toggle",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, **r.stats()}


def admin_pii_stats():
    """PII redactor counters: how many IDs/phones/emails/CCs have been
    scrubbed since startup (or last reset)."""
    from ..middleware.pii_redaction import get_pii_redactor
    return get_pii_redactor().stats()


def admin_pii_reset(request: Request):
    """Zero the per-kind counters. Does not change enabled state."""
    from ..middleware.pii_redaction import get_pii_redactor
    r = get_pii_redactor()
    r.reset()
    get_obs().audit(
        "pii_redaction.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
    )
    return {"ok": True, **r.stats()}


# ---- v2.9 Slow-query detection -----------------------------------------
class SlowThresholdRequest(BaseModel):
    ms: float


def admin_slow_queries(n: int = 20):
    """Top-N slowest requests + per-path aggregates + summary stats.

    ``n`` bounds the ``top`` list length (default 20, max 100).
    """
    from ..middleware.slow_queries import get_slow_tracker
    n = min(100, max(1, int(n)))
    t = get_slow_tracker()
    return {
        "stats": t.stats(),
        "top": t.top_n(n),
        "by_path": t.by_path(),
    }


def admin_slow_threshold(req: SlowThresholdRequest, request: Request):
    """Set the slow-query threshold in ms at runtime. 0 disables."""
    from ..middleware.slow_queries import get_slow_tracker
    t = get_slow_tracker()
    old = t.threshold_ms
    t.set_threshold(req.ms)
    get_obs().audit(
        "slow_queries.threshold",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        old_ms=old,
        new_ms=t.threshold_ms,
    )
    return {"ok": True, **t.stats()}


def admin_slow_reset(request: Request):
    """Clear the ring buffer and per-path aggregates."""
    from ..middleware.slow_queries import get_slow_tracker
    t = get_slow_tracker()
    t.reset()
    get_obs().audit(
        "slow_queries.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
    )
    return {"ok": True, **t.stats()}


# ---- v2.12 Daily quota -------------------------------------------------
class QuotaSetRequest(BaseModel):
    key_prefix: str
    limit: int


def admin_quotas_get():
    """Dump all quota state: limits, usage, day cursor, reset timer."""
    from ..middleware.quota import get_quota_tracker
    return get_quota_tracker().stats()


def admin_quota_set(req: QuotaSetRequest, request: Request):
    """Set a daily quota for a key (by hash prefix).

    ``limit=0`` = unlimited (also equivalent to deleting the quota).
    Usage counters are NOT reset — operator can raise the cap
    mid-day without wiping the meter.
    """
    from ..middleware.quota import get_quota_tracker
    t = get_quota_tracker()
    t.set_quota(req.key_prefix, req.limit)
    get_obs().audit(
        "quota.set",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        key_prefix=req.key_prefix,
        limit=req.limit,
    )
    return {"ok": True, "key_prefix": req.key_prefix,
            "limit": req.limit}


def admin_quota_clear(key_prefix: str, request: Request):
    """Remove quota enforcement for a key entirely (back to unlimited
    and zero-out its usage counter)."""
    from ..middleware.quota import get_quota_tracker
    t = get_quota_tracker()
    removed = t.clear_quota(key_prefix)
    if not removed:
        raise HTTPException(
            status_code=404,
            detail={"error": "no quota set for this key_prefix",
                    "key_prefix": key_prefix},
        )
    get_obs().audit(
        "quota.clear",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        key_prefix=key_prefix,
    )
    return {"ok": True, "key_prefix": key_prefix, "removed": True}


def admin_quota_reset_all(request: Request):
    """Wipe all quota state — limits AND usage. Testing / incident
    recovery. Audit-logged."""
    from ..middleware.quota import get_quota_tracker
    t = get_quota_tracker()
    t.reset()
    get_obs().audit(
        "quota.reset_all",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
    )
    return {"ok": True, **t.stats()}


# ---- v2.13 Idempotency -------------------------------------------------
def admin_idempotency_stats():
    from ..middleware.idempotency import get_idempotency_store
    return get_idempotency_store().stats()


def admin_idempotency_reset(request: Request):
    from ..middleware.idempotency import get_idempotency_store
    get_idempotency_store().reset()
    get_obs().audit(
        "idempotency.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
    )
    return {"ok": True, **get_idempotency_store().stats()}


class IdemTTLRequest(BaseModel):
    ttl_sec: float


def admin_idempotency_ttl(req: IdemTTLRequest, request: Request):
    from ..middleware.idempotency import get_idempotency_store
    s = get_idempotency_store()
    s.set_ttl(req.ttl_sec)
    get_obs().audit(
        "idempotency.ttl",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        ttl_sec=req.ttl_sec,
    )
    return {"ok": True, **s.stats()}


# ---- v2.14 Request timeout ---------------------------------------------
class TimeoutRequest(BaseModel):
    timeout_ms: float


def admin_request_timeout_stats():
    from ..middleware.request_timeout import get_timeout_guard
    return get_timeout_guard().stats()


def admin_request_timeout_set(req: TimeoutRequest, request: Request):
    """Set wall-clock request timeout in ms. 0 disables."""
    from ..middleware.request_timeout import get_timeout_guard
    g = get_timeout_guard()
    old = g.timeout_ms
    g.set_timeout_ms(req.timeout_ms)
    get_obs().audit(
        "request_timeout.set",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        old_ms=old, new_ms=g.timeout_ms,
    )
    return {"ok": True, **g.stats()}


def admin_request_timeout_reset(request: Request):
    from ..middleware.request_timeout import get_timeout_guard
    g = get_timeout_guard()
    g.reset()
    return {"ok": True, **g.stats()}


# ---- v2.16 Body limit --------------------------------------------------
class BodyLimitRequest(BaseModel):
    max_bytes: int


def admin_body_limit_get():
    from ..middleware.body_limit import get_body_limit_guard
    return get_body_limit_guard().stats()


def admin_body_limit_set(req: BodyLimitRequest, request: Request):
    """Set max body bytes at runtime. 0 disables."""
    from ..middleware.body_limit import get_body_limit_guard
    g = get_body_limit_guard()
    old = g.max_bytes
    g.set_max_bytes(req.max_bytes)
    get_obs().audit(
        "body_limit.set",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        old_bytes=old, new_bytes=g.max_bytes,
    )
    return {"ok": True, **g.stats()}


# ---- v2.17 Deprecation registry ----------------------------------------
class DeprecationRegisterRequest(BaseModel):
    path_prefix: str
    sunset_at: Optional[float] = None         # unix ts
    successor_url: Optional[str] = None
    docs_url: Optional[str] = None
    note: Optional[str] = None


def admin_deprecations_get():
    from ..middleware.deprecation import get_deprecation_registry
    return {"entries": get_deprecation_registry().all_entries()}


def admin_deprecations_register(req: DeprecationRegisterRequest,
                                request: Request):
    from ..middleware.deprecation import get_deprecation_registry
    r = get_deprecation_registry()
    try:
        entry = r.register(
            path_prefix=req.path_prefix,
            sunset_at=req.sunset_at,
            successor_url=req.successor_url,
            docs_url=req.docs_url,
            note=req.note,
        )
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "deprecation.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        path_prefix=req.path_prefix,
        sunset_at=req.sunset_at,
    )
    from dataclasses import asdict as _asdict
    return {"ok": True, **_asdict(entry)}


def admin_deprecations_clear(path_prefix: str, request: Request):
    from ..middleware.deprecation import get_deprecation_registry
    # Path params strip the leading /; add it back
    if not path_prefix.startswith("/"):
        path_prefix = "/" + path_prefix
    removed = get_deprecation_registry().unregister(path_prefix)
    if not removed:
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "path_prefix": path_prefix})
    get_obs().audit(
        "deprecation.unregister",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        path_prefix=path_prefix,
    )
    return {"ok": True, "removed": True, "path_prefix": path_prefix}


# ---- v2.18 Feature flags -----------------------------------------------
class FlagSetRequest(BaseModel):
    name: str
    value: bool


def admin_flags_get():
    from ..middleware.feature_flags import get_feature_flags
    return {"flags": get_feature_flags().all_flags()}


def admin_flags_set(req: FlagSetRequest, request: Request):
    from ..middleware.feature_flags import get_feature_flags
    fl = get_feature_flags()
    entry = fl.set(req.name, req.value)
    if entry is None:
        raise HTTPException(
            status_code=404,
            detail={"error": "flag not registered",
                    "name": req.name})
    get_obs().audit(
        "flag.set",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=req.name, value=req.value,
    )
    from dataclasses import asdict as _asdict
    return {"ok": True, **_asdict(entry)}


def admin_flags_reset(name: str, request: Request):
    from ..middleware.feature_flags import get_feature_flags
    fl = get_feature_flags()
    entry = fl.reset_to_default(name)
    if entry is None:
        raise HTTPException(status_code=404,
                            detail={"error": "flag not registered",
                                    "name": name})
    get_obs().audit(
        "flag.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=name,
    )
    from dataclasses import asdict as _asdict
    return {"ok": True, **_asdict(entry)}


# ---- v2.19 SIGTERM shutdown manager ------------------------------------
class ShutdownConfigRequest(BaseModel):
    grace_sec: Optional[float] = None
    install: Optional[bool] = None


def admin_shutdown_stats():
    from ..middleware.shutdown import get_shutdown_manager
    return get_shutdown_manager().stats()


def admin_shutdown_config(req: ShutdownConfigRequest, request: Request):
    """Configure graceful-shutdown behavior. ``install=True`` attaches
    SIGTERM/SIGINT handlers; grace_sec sets the drain-before-exit
    window."""
    from ..middleware.shutdown import get_shutdown_manager
    m = get_shutdown_manager()
    if req.grace_sec is not None:
        m.set_grace_sec(req.grace_sec)
    if req.install is True:
        m.install()
    elif req.install is False:
        m.uninstall()
    get_obs().audit(
        "shutdown.config",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        grace_sec=req.grace_sec, install=req.install,
    )
    return {"ok": True, **m.stats()}


# ---- v2.20 Per-endpoint timeouts (extends v2.14 admin) ----------------
class EndpointTimeoutRequest(BaseModel):
    path_prefix: str
    timeout_ms: float


def admin_endpoint_timeout_set(req: EndpointTimeoutRequest,
                               request: Request):
    """Set a per-endpoint timeout override (v2.20). ``timeout_ms=0``
    removes the override; endpoint falls back to global."""
    from ..middleware.request_timeout import get_timeout_guard
    g = get_timeout_guard()
    g.set_endpoint_timeout(req.path_prefix, req.timeout_ms)
    get_obs().audit(
        "request_timeout.endpoint",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        path_prefix=req.path_prefix, timeout_ms=req.timeout_ms,
    )
    return {"ok": True, **g.stats()}


def admin_endpoint_timeout_clear(path_prefix: str, request: Request):
    from ..middleware.request_timeout import get_timeout_guard
    g = get_timeout_guard()
    if not path_prefix.startswith("/"):
        path_prefix = "/" + path_prefix
    removed = g.clear_endpoint_timeout(path_prefix)
    if not removed:
        raise HTTPException(status_code=404,
                            detail={"error": "not set",
                                    "path_prefix": path_prefix})
    return {"ok": True, "removed": True, **g.stats()}


# ---- v2.21 Cost tracking ---------------------------------------------
class CostRateRequest(BaseModel):
    endpoint: str            # path prefix, or "*" for default
    per_call_usd: float = 0.0
    per_1k_tokens_usd: float = 0.0


def admin_costs_get(n: int = 20):
    from ..middleware.cost import get_cost_tracker
    n = min(100, max(1, int(n)))
    t = get_cost_tracker()
    return {
        "stats": t.stats(),
        "top_spenders": t.top_spenders(n),
    }


def admin_costs_set_rate(req: CostRateRequest, request: Request):
    from ..middleware.cost import get_cost_tracker
    t = get_cost_tracker()
    t.set_rate(req.endpoint,
               per_call_usd=req.per_call_usd,
               per_1k_tokens_usd=req.per_1k_tokens_usd)
    get_obs().audit(
        "cost.rate",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        endpoint=req.endpoint,
        per_call_usd=req.per_call_usd,
        per_1k_tokens_usd=req.per_1k_tokens_usd,
    )
    return {"ok": True, **t.stats()}


def admin_costs_key_usage(key_prefix: str):
    from ..middleware.cost import get_cost_tracker
    return get_cost_tracker().usage_for_key(key_prefix)


def admin_costs_reset(request: Request):
    from ..middleware.cost import get_cost_tracker
    t = get_cost_tracker()
    t.reset()
    return {"ok": True, **t.stats()}


# ---- v2.22 Response compression ----------------------------------------
class CompressionConfigRequest(BaseModel):
    enabled: Optional[bool] = None
    min_size_bytes: Optional[int] = None


def admin_compression_get():
    from ..middleware.compression import get_compression_tracker
    return get_compression_tracker().stats()


def admin_compression_set(req: CompressionConfigRequest, request: Request):
    from ..middleware.compression import get_compression_tracker
    t = get_compression_tracker()
    if req.enabled is not None:
        t.set_enabled(req.enabled)
    if req.min_size_bytes is not None:
        t.set_min_size(req.min_size_bytes)
    get_obs().audit(
        "compression.config",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled, min_size_bytes=req.min_size_bytes,
    )
    return {"ok": True, **t.stats()}


# ---- v2.23 IP allowlist ------------------------------------------------
class IPAllowlistAddRequest(BaseModel):
    key_prefix: str
    ip_or_cidr: str


def admin_ip_allowlist_get():
    from ..middleware.ip_allowlist import get_ip_allowlist
    s = get_ip_allowlist()
    return {"entries": s.all_entries(), **s.stats()}


def admin_ip_allowlist_add(req: IPAllowlistAddRequest, request: Request):
    from ..middleware.ip_allowlist import get_ip_allowlist
    s = get_ip_allowlist()
    try:
        s.add(req.key_prefix, req.ip_or_cidr)
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "ip_allowlist.add",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        key_prefix=req.key_prefix, ip_or_cidr=req.ip_or_cidr,
    )
    return {"ok": True, "key_prefix": req.key_prefix,
            "entries": s.list_for_key(req.key_prefix)}


def admin_ip_allowlist_clear_key(key_prefix: str, request: Request):
    from ..middleware.ip_allowlist import get_ip_allowlist
    s = get_ip_allowlist()
    removed = s.clear_key(key_prefix)
    if not removed:
        raise HTTPException(status_code=404,
                            detail={"error": "no entries",
                                    "key_prefix": key_prefix})
    return {"ok": True, "key_prefix": key_prefix, "removed": True}


# ---- v2.24 Query complexity --------------------------------------------
class QueryComplexityConfigRequest(BaseModel):
    max_tokens: Optional[int] = None
    max_chars: Optional[int] = None
    min_unique_ratio: Optional[float] = None
    enabled: Optional[bool] = None


def admin_query_complexity_get():
    from ..middleware.query_complexity import get_query_complexity
    return get_query_complexity().stats()


def admin_query_complexity_set(req: QueryComplexityConfigRequest,
                                request: Request):
    from ..middleware.query_complexity import get_query_complexity
    s = get_query_complexity()
    s.configure(max_tokens=req.max_tokens,
                max_chars=req.max_chars,
                min_unique_ratio=req.min_unique_ratio,
                enabled=req.enabled)
    get_obs().audit(
        "query_complexity.config",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        **req.model_dump(exclude_unset=True),
    )
    return {"ok": True, **s.stats()}


def admin_query_complexity_reset(request: Request):
    from ..middleware.query_complexity import get_query_complexity
    s = get_query_complexity()
    s.reset()
    return {"ok": True, **s.stats()}


# ---- v2.25 Per-key labels ----------------------------------------------
class KeyLabelRequest(BaseModel):
    key_prefix: str
    name: str
    value: str


def admin_key_labels_get():
    from ..middleware.key_labels import get_key_labels
    s = get_key_labels()
    return {"entries": s.all_entries(), **s.stats()}


def admin_key_labels_for_key(key_prefix: str):
    from ..middleware.key_labels import get_key_labels
    return {"key_prefix": key_prefix,
            "labels": get_key_labels().get_labels(key_prefix)}


def admin_key_labels_set(req: KeyLabelRequest, request: Request):
    from ..middleware.key_labels import get_key_labels
    s = get_key_labels()
    s.set_label(req.key_prefix, req.name, req.value)
    get_obs().audit(
        "key_labels.set",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        key_prefix=req.key_prefix, name=req.name, value=req.value,
    )
    return {"ok": True, "key_prefix": req.key_prefix,
            "labels": s.get_labels(req.key_prefix)}


def admin_key_labels_remove(key_prefix: str, name: str,
                             request: Request):
    from ..middleware.key_labels import get_key_labels
    s = get_key_labels()
    removed = s.remove_label(key_prefix, name)
    if not removed:
        raise HTTPException(status_code=404,
                            detail={"error": "label not set",
                                    "key_prefix": key_prefix,
                                    "name": name})
    return {"ok": True, "key_prefix": key_prefix, "removed": name}


def admin_key_labels_clear(key_prefix: str, request: Request):
    from ..middleware.key_labels import get_key_labels
    s = get_key_labels()
    removed = s.clear_key(key_prefix)
    if not removed:
        raise HTTPException(status_code=404,
                            detail={"error": "no labels",
                                    "key_prefix": key_prefix})
    return {"ok": True, "key_prefix": key_prefix, "removed": True}


# ---- v2.26 i18n --------------------------------------------------------
class I18nRegisterRequest(BaseModel):
    code: str
    lang: str
    message: str


def admin_i18n_get():
    from ..middleware.i18n import get_i18n
    r = get_i18n()
    return {"entries": r.all_entries(), **r.stats()}


def admin_i18n_register(req: I18nRegisterRequest, request: Request):
    from ..middleware.i18n import get_i18n
    r = get_i18n()
    r.register(req.code, req.lang, req.message)
    get_obs().audit(
        "i18n.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        code=req.code, lang=req.lang,
    )
    return {"ok": True, **r.stats()}


def admin_i18n_unregister(code: str, lang: str, request: Request):
    from ..middleware.i18n import get_i18n
    r = get_i18n()
    if not r.unregister(code, lang):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "code": code, "lang": lang})
    return {"ok": True, "code": code, "lang": lang, "removed": True}


# ---- v2.27 Request bundle ----------------------------------------------
def admin_request_bundle(request_id: str):
    """Post-incident forensic dump: request_log + captured_body +
    spans + audit_events + timing, all for one request_id. One call
    instead of N."""
    from ..middleware.request_bundle import build_bundle
    return build_bundle(request_id)


# ---- v2.28 Per-label aggregation ---------------------------------------
def admin_aggregate_by_label(label_name: str, n: int = 2000):
    """Aggregate recent traffic by a label value (tenant, tier, region).
    Returns per-bucket n_requests, latency p50/p95/p99, cost, status
    counts. Stateless query over obs log + cost tracker."""
    from ..middleware.label_aggregation import aggregate_by_label
    return aggregate_by_label(label_name, n_rows=min(10000, max(100, int(n))))


# ---- v2.29 Unified admin dashboard HTML --------------------------------
def admin_dashboard(refresh: int = 0):
    """One-page HTML dashboard consolidating every admin stat. Inline
    CSS, zero JS, escape-safe, optional meta-refresh."""
    from fastapi.responses import HTMLResponse
    from .ops_dashboard_ui import render_dashboard
    from ..middleware import (
        get_obs, get_limiter, get_quota_tracker,
        get_body_limit_guard, get_compression_tracker,
        get_slow_tracker, get_cost_tracker,
        get_maintenance, get_ip_allowlist, get_pii_redactor,
        get_readiness_registry,
    )
    from ..observability.tracing import get_tracer
    obs = get_obs()
    html_out = render_dashboard(
        version=app.version,
        obs_stats=obs.stats(),
        limiter_stats=get_limiter().stats(),
        quota_stats=get_quota_tracker().stats(),
        body_stats=get_body_limit_guard().stats(),
        compression_stats=get_compression_tracker().stats(),
        slow_stats=get_slow_tracker().stats(),
        spans_count=len(get_tracer().spans),
        cost_stats=get_cost_tracker().stats(),
        top_spenders=get_cost_tracker().top_spenders(5),
        maint_stats=get_maintenance().snapshot(),
        ip_allow_stats=get_ip_allowlist().stats(),
        pii_stats=get_pii_redactor().stats(),
        readiness=get_readiness_registry().evaluate(),
        refresh_sec=max(0, int(refresh)),
    )
    return HTMLResponse(content=html_out, status_code=200)


# ---- v2.30 SLO tracking ------------------------------------------------
class SLORegisterRequest(BaseModel):
    name: str
    kind: str                          # "latency" | "availability"
    target_pct: float
    window: int = 1000
    latency_threshold_ms: float = 0.0


def admin_slo_get():
    from ..middleware.slo import get_slo_tracker
    return {"slos": get_slo_tracker().snapshot()}


def admin_slo_register(req: SLORegisterRequest, request: Request):
    from ..middleware.slo import get_slo_tracker
    t = get_slo_tracker()
    try:
        slo = t.register(
            name=req.name, kind=req.kind, target_pct=req.target_pct,
            window=req.window,
            latency_threshold_ms=req.latency_threshold_ms,
        )
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "slo.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        **req.model_dump(exclude_unset=True),
    )
    return {"ok": True, "name": slo.name, "kind": slo.kind,
            "target_pct": slo.target_pct}


def admin_slo_unregister(name: str, request: Request):
    from ..middleware.slo import get_slo_tracker
    t = get_slo_tracker()
    if not t.unregister(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": name})
    return {"ok": True, "name": name, "removed": True}


def admin_slo_reset(request: Request):
    from ..middleware.slo import get_slo_tracker
    get_slo_tracker().reset()
    return {"ok": True}


# ---- v2.31 Async jobs --------------------------------------------------
class AsyncGenerateRequest(BaseModel):
    query: str
    k: int = 5


@app.post("/v1/generate/async", status_code=202)
def generate_async(req: AsyncGenerateRequest, request: Request):
    """Schedule a generation as a background job. Returns 202 Accepted
    + job_id. Poll GET /v1/jobs/{id} for status."""
    from ..middleware.async_jobs import get_job_queue
    from ..core.types import Query
    def _work():
        q = Query(text=req.query)
        return _pipeline.run(q, k=req.k).to_dict()
    job_id = get_job_queue().enqueue(_work, kind="generate",
                                      meta={"k": req.k})
    return {"job_id": job_id, "status": "queued"}


@app.get("/v1/jobs/{job_id}")
def get_job(job_id: str):
    from ..middleware.async_jobs import get_job_queue
    job = get_job_queue().get(job_id)
    if job is None:
        raise HTTPException(status_code=404,
                            detail={"error": "job not found",
                                    "job_id": job_id})
    return job.to_dict()


@app.post("/v1/jobs/{job_id}/cancel")
def cancel_job(job_id: str):
    from ..middleware.async_jobs import get_job_queue
    if get_job_queue().cancel(job_id):
        return {"ok": True, "job_id": job_id, "cancelled": True}
    raise HTTPException(status_code=404,
                        detail={"error": "job not cancellable",
                                "job_id": job_id})


def admin_jobs_list(status: Optional[str] = None, limit: int = 50):
    from ..middleware.async_jobs import get_job_queue
    q = get_job_queue()
    return {"stats": q.stats(),
            "jobs": q.list_jobs(status_filter=status, limit=limit)}


# ---- v2.32 Cost alerts -------------------------------------------------
class CostAlertRequest(BaseModel):
    key_prefix: str
    thresholds_usd: List[float]


class CostAlertGlobalRequest(BaseModel):
    thresholds_usd: List[float]


def admin_cost_alerts_get():
    from ..middleware.cost_alerts import get_cost_alerts
    return get_cost_alerts().stats()


def admin_cost_alerts_set_key(req: CostAlertRequest, request: Request):
    from ..middleware.cost_alerts import get_cost_alerts
    get_cost_alerts().set_thresholds(req.key_prefix, req.thresholds_usd)
    get_obs().audit(
        "cost_alerts.set_key",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        key_prefix=req.key_prefix,
        thresholds_usd=req.thresholds_usd,
    )
    return {"ok": True, **get_cost_alerts().stats()}


def admin_cost_alerts_set_global(req: CostAlertGlobalRequest,
                                  request: Request):
    from ..middleware.cost_alerts import get_cost_alerts
    get_cost_alerts().set_global_thresholds(req.thresholds_usd)
    get_obs().audit(
        "cost_alerts.set_global",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        thresholds_usd=req.thresholds_usd,
    )
    return {"ok": True, **get_cost_alerts().stats()}


def admin_cost_alerts_reset_fired(request: Request):
    from ..middleware.cost_alerts import get_cost_alerts
    get_cost_alerts().reset_fired()
    return {"ok": True, **get_cost_alerts().stats()}


# ---- v2.33 Prometheus histograms ---------------------------------------
@app.get("/metrics/histograms", response_class=Response, include_in_schema=False)
def prometheus_histograms():
    """Prometheus-format request-duration histograms per path."""
    from fastapi.responses import PlainTextResponse
    from ..middleware.prom_histograms import get_prom_histograms
    text = get_prom_histograms().render()
    return PlainTextResponse(content=text, status_code=200,
                              media_type="text/plain; version=0.0.4")


def admin_histograms_stats():
    from ..middleware.prom_histograms import get_prom_histograms
    return get_prom_histograms().stats()


# ---- v2.34 ETag / conditional GET --------------------------------------
@app.get("/v1/etag_stats")
def etag_stats_cachable(request: Request):
    """Cacheable version-manifest-ish endpoint demonstrating ETag.
    Returns 304 when the client's ``If-None-Match`` matches current
    payload hash."""
    from fastapi.responses import JSONResponse as _JR
    from fastapi.responses import Response as _Resp
    from ..middleware.etag import (
        compute_etag as _cet,
        matches_if_none_match as _mch,
        get_etag_tracker,
    )
    try:
        n_docs = len(getattr(_pipeline, "_docs", []))
    except Exception:
        n_docs = 0
    payload = {"version": app.version, "n_documents": n_docs}
    etag = _cet(payload)
    inm = request.headers.get("if-none-match")
    if _mch(etag, inm):
        get_etag_tracker().record("/v1/etag_stats", was_304=True)
        return _Resp(status_code=304,
                      headers={"ETag": etag, "X-Request-ID":
                               getattr(request.state, "request_id", "")})
    get_etag_tracker().record("/v1/etag_stats", was_304=False)
    return _JR(content=payload, status_code=200,
                headers={"ETag": etag,
                         "Cache-Control": "private, must-revalidate"})


# v3.x — Generator health diagnostic. Lets the user check if a local
# LLM (Ollama / TAU Native) is wired up and reachable before running
# a real query. Public, read-only. Returns provider-specific status
# blocks — e.g. the TAU native generator reports checkpoint path,
# epoch, loss, and parameter count if it can load.
@app.get("/v1/generator/health")
def generator_health():
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
        gen = pipe.generator
        result = {
            "provider":   pipe.config.generation.provider,
            "model":      pipe.config.generation.model,
            "class":      type(gen).__name__,
            "name":       getattr(gen, "name", "unknown"),
        }
        if hasattr(gen, "health") and callable(gen.health):
            try:
                result["health"] = gen.health()
            except Exception as e:
                result["health"] = {"ok": False,
                                    "error": f"{type(e).__name__}: {e}"}
        return result
    except Exception as e:
        return {"ok": False,
                "error": f"{type(e).__name__}: {e}"}


# v3.x — Training data summary. Returns counts of (query, context, answer)
# triples collected via TAU_RAG_COLLECT_TRAINING=1, plus feedback rollup.
# Safe public endpoint — no raw queries leaked.
@app.get("/v1/training_data/summary")
def training_data_summary():
    try:
        from ..middleware.training_collector import get_training_collector
        return get_training_collector().summary()
    except Exception as e:
        return {"ok": False,
                "error": f"{type(e).__name__}: {e}"}


# v2.x — PUBLIC suggestions endpoint for the UI.
# Intentionally NOT under /v1/admin/*, so the global admin-scope
# middleware lets it through. Returns a redacted view of the top
# queries — just the text and count — with no sensitive fields
# (no latencies, no user hashes, no timestamps). Safe for the static
# welcome screen to populate its "suggested questions" chips.
@app.get("/v1/suggestions")
def public_suggestions(limit: int = 8):
    """Top N recent/popular queries — safe read-only slice for the UI.

    Not admin-gated. Returns: ``{"top_queries": [{"text": str, "count": int}, ...]}``.
    Falls back to ``{"top_queries": []}`` if the query-stats store is
    empty, unavailable, or too young to be meaningful (<2 uses per item).
    """
    try:
        from ..middleware import get_query_stats
        store = get_query_stats()
        raw = store.top(n=max(1, min(int(limit), 20)))
    except Exception:
        raw = []
    out = []
    for row in (raw or []):
        # top() returns dicts keyed by fingerprint with at least 'sample'
        # and 'count' (see middleware/query_stats.py). Defensive-access
        # so schema drift doesn't 500 the UI.
        text = (row.get("sample") or row.get("text")
                or row.get("query") or "").strip()
        count = int(row.get("count") or row.get("n") or 0)
        if not text or count < 1:
            continue
        if len(text) < 4 or len(text) > 120:
            continue       # filter junk-length queries
        out.append({"text": text, "count": count})
    return {"top_queries": out}


def admin_etag_stats():
    from ..middleware.etag import get_etag_tracker
    return get_etag_tracker().stats()


# ---- v2.35 HMAC signing store -----------------------------------------
class HMACRegisterRequest(BaseModel):
    key_prefix: str
    secret: str


def admin_hmac_get():
    from ..middleware.hmac_signing import get_hmac_signing
    s = get_hmac_signing()
    return {"keys": s.all_entries(), **s.stats()}


def admin_hmac_register(req: HMACRegisterRequest, request: Request):
    """Register an HMAC secret for a key (by hash prefix). Once
    registered, the key MUST sign future requests."""
    from ..middleware.hmac_signing import get_hmac_signing
    s = get_hmac_signing()
    s.set_secret(req.key_prefix, req.secret)
    get_obs().audit(
        "hmac.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        key_prefix=req.key_prefix,
    )
    return {"ok": True, "key_prefix": req.key_prefix}


def admin_hmac_remove(key_prefix: str, request: Request):
    from ..middleware.hmac_signing import get_hmac_signing
    s = get_hmac_signing()
    if not s.clear_secret(key_prefix):
        raise HTTPException(status_code=404,
                            detail={"error": "no secret",
                                    "key_prefix": key_prefix})
    return {"ok": True, "key_prefix": key_prefix, "removed": True}


# ---- v2.36 Batch query endpoint ----------------------------------------
class BatchQueryItem(BaseModel):
    query: str
    k: int = 5


class BatchSearchRequest(BaseModel):
    queries: List[BatchQueryItem]


@app.post("/v1/search/batch")
def search_batch(req: BatchSearchRequest, request: Request):
    """Execute multiple searches in one request. Shares auth/rate/quota
    checks. Each query independently returns its own result or error;
    one failure doesn't fail the whole batch."""
    from ..core.types import Query
    if not req.queries:
        raise HTTPException(
            status_code=400,
            detail={"error": "empty batch"},
        )
    # Reasonable cap — same spirit as body-limit
    if len(req.queries) > 100:
        raise HTTPException(
            status_code=413,
            detail={"error": "too many queries in batch",
                    "limit": 100,
                    "got": len(req.queries)},
        )
    results: List[Dict[str, Any]] = []
    for i, item in enumerate(req.queries):
        try:
            q = Query(text=item.query)
            result = _pipeline.run(q, k=item.k)
            results.append({
                "index": i,
                "query": item.query,
                "ok": True,
                "result": result.to_dict(),
            })
        except Exception as e:
            results.append({
                "index": i,
                "query": item.query,
                "ok": False,
                "error": f"{type(e).__name__}: {e}"[:200],
            })
    return {
        "n_queries": len(req.queries),
        "n_succeeded": sum(1 for r in results if r["ok"]),
        "n_failed": sum(1 for r in results if not r["ok"]),
        "results": results,
    }


# ---- v2.37 Webhook retry + DLQ -----------------------------------------
def admin_webhook_retry_get():
    from ..middleware.webhook_retry import get_webhook_retry
    q = get_webhook_retry()
    return {
        "stats": q.stats(),
        "pending": q.pending(),
        "dlq": q.dlq(),
    }


def admin_webhook_replay(event_id: str, request: Request):
    from ..middleware.webhook_retry import get_webhook_retry
    if not get_webhook_retry().replay_dlq(event_id):
        raise HTTPException(status_code=404,
                            detail={"error": "not in dlq",
                                    "event_id": event_id})
    get_obs().audit(
        "webhook_retry.replay",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        event_id=event_id,
    )
    return {"ok": True, "event_id": event_id, "replayed": True}


def admin_webhook_purge_dlq(request: Request):
    from ..middleware.webhook_retry import get_webhook_retry
    n = get_webhook_retry().purge_dlq()
    get_obs().audit(
        "webhook_retry.purge",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        purged=n,
    )
    return {"ok": True, "purged": n}


# ---- v2.38 Hebrew normalization ---------------------------------------
class HebrewNormConfigRequest(BaseModel):
    enabled: bool


def admin_hebrew_norm_get():
    from ..middleware.hebrew_normalize import get_hebrew_normalizer
    return get_hebrew_normalizer().stats()


def admin_hebrew_norm_set(req: HebrewNormConfigRequest, request: Request):
    from ..middleware.hebrew_normalize import get_hebrew_normalizer
    n = get_hebrew_normalizer()
    n.set_enabled(req.enabled)
    get_obs().audit(
        "hebrew_normalize.toggle",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, **n.stats()}


class HebrewNormPreviewRequest(BaseModel):
    text: str


def admin_hebrew_norm_preview(req: HebrewNormPreviewRequest):
    """Preview normalization result — useful for clients to see what
    their query canonicalizes to."""
    from ..middleware.hebrew_normalize import normalize, would_canonicalize_to
    return {
        "input": req.text,
        "normalized": normalize(req.text),
        "changed": normalize(req.text) != req.text,
    }


# ---- v2.39 Audit log search --------------------------------------------
class AuditSearchRequest(BaseModel):
    kind: Optional[str] = None
    kind_prefix: Optional[str] = None
    actor_prefix: Optional[str] = None
    since: Optional[float] = None
    until: Optional[float] = None
    substring: Optional[str] = None
    limit: int = 100
    offset: int = 0


def admin_audit_search(req: AuditSearchRequest):
    from ..middleware.audit_search import search_audit
    return search_audit(
        kind=req.kind, kind_prefix=req.kind_prefix,
        actor_prefix=req.actor_prefix,
        since=req.since, until=req.until,
        substring=req.substring,
        limit=req.limit, offset=req.offset,
    )


def admin_audit_summary(since: Optional[float] = None):
    from ..middleware.audit_search import audit_summary
    return audit_summary(since=since)


# ---- v2.40 Tombstones / soft-delete ------------------------------------
class SoftDeleteRequest(BaseModel):
    doc_id: str
    reason: Optional[str] = None


def admin_tombstones_get():
    from ..middleware.tombstones import get_tombstones
    t = get_tombstones()
    return {
        "stats": t.stats(),
        "tombstones": t.all_tombstones(),
    }


def admin_tombstones_soft_delete(req: SoftDeleteRequest,
                                 request: Request):
    from ..middleware.tombstones import get_tombstones
    t = get_tombstones()
    actor_hash = _hash_prefix(request.headers.get("x-api-key"))
    tomb = t.soft_delete(req.doc_id, by_prefix=actor_hash,
                          reason=req.reason)
    get_obs().audit(
        "tombstone.soft_delete",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        doc_id=req.doc_id, reason=req.reason,
    )
    return {"ok": True, **tomb.to_dict()}


def admin_tombstones_restore(doc_id: str, request: Request):
    from ..middleware.tombstones import get_tombstones
    t = get_tombstones()
    if not t.restore(doc_id):
        raise HTTPException(status_code=404,
                            detail={"error": "not tombstoned",
                                    "doc_id": doc_id})
    get_obs().audit(
        "tombstone.restore",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        doc_id=doc_id,
    )
    return {"ok": True, "doc_id": doc_id, "restored": True}


def admin_tombstones_purge(doc_id: str, request: Request):
    """Force-remove tombstone before retention expires."""
    from ..middleware.tombstones import get_tombstones
    t = get_tombstones()
    if not t.purge(doc_id):
        raise HTTPException(status_code=404,
                            detail={"error": "not tombstoned",
                                    "doc_id": doc_id})
    get_obs().audit(
        "tombstone.purge",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        doc_id=doc_id,
    )
    return {"ok": True, "doc_id": doc_id, "purged": True}


def admin_tombstones_purge_expired(request: Request):
    from ..middleware.tombstones import get_tombstones
    t = get_tombstones()
    expired = t.purge_expired()
    get_obs().audit(
        "tombstone.purge_expired",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        n_purged=len(expired),
    )
    return {"ok": True, "n_purged": len(expired),
            "doc_ids": expired}


# ---- v2.41 Query coalescing -------------------------------------------
class CoalescingConfigRequest(BaseModel):
    enabled: bool


def admin_coalescing_get(n: int = 10):
    from ..middleware.query_coalescing import get_coalescing_tracker
    t = get_coalescing_tracker()
    return {
        "stats": t.stats(),
        "top_classes": t.top_classes(n=n),
    }


def admin_coalescing_set(req: CoalescingConfigRequest,
                          request: Request):
    from ..middleware.query_coalescing import get_coalescing_tracker
    t = get_coalescing_tracker()
    t.set_enabled(req.enabled)
    get_obs().audit(
        "coalescing.toggle",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, **t.stats()}


def admin_coalescing_reset(request: Request):
    from ..middleware.query_coalescing import get_coalescing_tracker
    get_coalescing_tracker().reset()
    return {"ok": True}


# ---- v2.42 Tenant flag overrides --------------------------------------
class TenantFlagSetRequest(BaseModel):
    flag_name: str
    tenant_value: str
    value: bool


def admin_tenant_flags_get():
    from ..middleware.tenant_flags import get_tenant_flags
    s = get_tenant_flags()
    return {
        "stats": s.stats(),
        "entries": s.all_entries(),
    }


def admin_tenant_flags_set(req: TenantFlagSetRequest,
                            request: Request):
    from ..middleware.tenant_flags import get_tenant_flags
    s = get_tenant_flags()
    s.set_override(req.flag_name, req.tenant_value, req.value)
    get_obs().audit(
        "tenant_flag.set",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        flag_name=req.flag_name,
        tenant_value=req.tenant_value,
        value=req.value,
    )
    return {"ok": True, **s.stats()}


def admin_tenant_flags_remove(flag_name: str, tenant_value: str,
                               request: Request):
    from ..middleware.tenant_flags import get_tenant_flags
    s = get_tenant_flags()
    if not s.remove_override(flag_name, tenant_value):
        raise HTTPException(status_code=404,
                            detail={"error": "no override",
                                    "flag_name": flag_name,
                                    "tenant_value": tenant_value})
    return {"ok": True, "removed": True,
            "flag_name": flag_name,
            "tenant_value": tenant_value}


# ---- v2.43 Response redaction ------------------------------------------
def admin_response_redact_get():
    from ..middleware.response_redact import get_response_redactor
    r = get_response_redactor()
    return {
        "stats": r.stats(),
        "rules": r.describe(),
    }


def admin_response_redact_clear(request: Request):
    from ..middleware.response_redact import get_response_redactor
    get_response_redactor().clear()
    get_obs().audit(
        "response_redact.clear",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
    )
    return {"ok": True}


# ---- v2.44 Config snapshot --------------------------------------------
def admin_config_snapshot(request: Request):
    """Export current middleware state as JSON. Secrets redacted."""
    from ..middleware.config_snapshot import build_snapshot
    snap = build_snapshot()
    get_obs().audit(
        "config_snapshot.export",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
    )
    return snap


class ConfigRestoreRequest(BaseModel):
    snapshot: Dict[str, Any]


def admin_config_restore(req: ConfigRestoreRequest, request: Request):
    """Restore middleware state from a snapshot. Returns a report of
    what was applied vs skipped. Secrets are never restored."""
    from ..middleware.config_snapshot import restore_snapshot
    report = restore_snapshot(req.snapshot)
    get_obs().audit(
        "config_snapshot.restore",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        applied=report.get("applied", []),
        errors=report.get("errors", []),
    )
    return report


# ---- v2.45 Shadow pipeline --------------------------------------------
class ShadowConfigRequest(BaseModel):
    enabled: Optional[bool] = None
    sample_rate: Optional[float] = None


def admin_shadow_get(n: int = 20):
    from ..middleware.shadow_pipeline import get_shadow_tracker
    t = get_shadow_tracker()
    return {
        "stats": t.stats(),
        "recent_comparisons": t.recent_comparisons(n=n),
    }


def admin_shadow_config(req: ShadowConfigRequest, request: Request):
    from ..middleware.shadow_pipeline import get_shadow_tracker
    t = get_shadow_tracker()
    if req.enabled is not None:
        t.set_enabled(req.enabled)
    if req.sample_rate is not None:
        t.set_sample_rate(req.sample_rate)
    get_obs().audit(
        "shadow.config",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
        sample_rate=req.sample_rate,
    )
    return {"ok": True, **t.stats()}


def admin_shadow_reset(request: Request):
    from ..middleware.shadow_pipeline import get_shadow_tracker
    get_shadow_tracker().reset()
    return {"ok": True}


# ---- v2.46 Scheduled tasks ---------------------------------------------
def admin_scheduler_get():
    from ..middleware.scheduled_tasks import get_scheduler
    s = get_scheduler()
    return {"stats": s.stats(), "tasks": s.list_tasks()}


class SchedulerControlRequest(BaseModel):
    action: str   # "start" | "stop"


def admin_scheduler_control(req: SchedulerControlRequest,
                            request: Request):
    from ..middleware.scheduled_tasks import get_scheduler
    s = get_scheduler()
    if req.action == "start":
        started = s.start()
        return {"ok": True, "started": started}
    elif req.action == "stop":
        stopped = s.stop()
        return {"ok": True, "stopped": stopped}
    raise HTTPException(status_code=400,
                        detail={"error": "action must be start|stop"})


def admin_scheduler_run_now(name: str, request: Request):
    from ..middleware.scheduled_tasks import get_scheduler
    s = get_scheduler()
    if not s.run_now(name):
        raise HTTPException(status_code=404,
                            detail={"error": "task not found",
                                    "name": name})
    return {"ok": True, "ran": name}


def admin_scheduler_toggle(name: str, enabled: bool = True,
                            request: Request = None):
    from ..middleware.scheduled_tasks import get_scheduler
    s = get_scheduler()
    if not s.set_enabled(name, enabled):
        raise HTTPException(status_code=404,
                            detail={"error": "task not found",
                                    "name": name})
    return {"ok": True, "name": name, "enabled": enabled}


# ---- v2.47 Pipeline stage breakers --------------------------------------
def admin_stage_breakers_get():
    from ..middleware.stage_breakers import get_stage_breakers
    s = get_stage_breakers()
    return {"stats": s.stats(), "breakers": s.all_breakers()}


def admin_stage_breaker_reset(name: str, request: Request):
    from ..middleware.stage_breakers import get_stage_breakers
    s = get_stage_breakers()
    if not s.reset(name):
        raise HTTPException(status_code=404,
                            detail={"error": "breaker not registered",
                                    "name": name})
    get_obs().audit(
        "stage_breaker.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=name,
    )
    return {"ok": True, "name": name, "state": "closed"}


# ---- v2.48 Traceparent ---------------------------------------------------
class TraceparentConfigRequest(BaseModel):
    enabled: bool


def admin_traceparent_get():
    from ..middleware.traceparent import get_traceparent_manager
    return get_traceparent_manager().stats()


def admin_traceparent_set(req: TraceparentConfigRequest,
                           request: Request):
    from ..middleware.traceparent import get_traceparent_manager
    m = get_traceparent_manager()
    m.set_enabled(req.enabled)
    get_obs().audit(
        "traceparent.toggle",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, **m.stats()}


# ---- v2.49 JWT --------------------------------------------------------
class JWTConfigRequest(BaseModel):
    secret: Optional[str] = None
    expected_issuer: Optional[str] = None
    leeway_sec: Optional[float] = None


def admin_jwt_get():
    from ..middleware.jwt_auth import get_jwt_store
    return get_jwt_store().stats()


def admin_jwt_config(req: JWTConfigRequest, request: Request):
    from ..middleware.jwt_auth import get_jwt_store
    s = get_jwt_store()
    if req.secret is not None:
        s.set_secret(req.secret or None)
    if req.expected_issuer is not None:
        s.set_expected_issuer(req.expected_issuer or None)
    if req.leeway_sec is not None:
        s.set_leeway(req.leeway_sec)
    get_obs().audit(
        "jwt.config",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        expected_issuer=req.expected_issuer,
        leeway_sec=req.leeway_sec,
        secret_set=(req.secret is not None),
    )
    return {"ok": True, **s.stats()}


# ---- v2.50 Bulk document import ----------------------------------------
class BulkImportItem(BaseModel):
    id: str
    text: str


class BulkImportRequest(BaseModel):
    documents: List[BulkImportItem]


@app.post("/v1/documents/bulk_async", status_code=202)
def bulk_import_docs(req: BulkImportRequest, request: Request):
    """Start a bulk import job. Returns job_id immediately. (v2.50)"""
    from ..middleware.bulk_import import get_bulk_importer
    from ..middleware.async_jobs import get_job_queue
    from ..core.types import Document
    if not req.documents:
        raise HTTPException(status_code=400,
                            detail={"error": "empty document list"})
    if len(req.documents) > 5000:
        raise HTTPException(status_code=413,
                            detail={"error": "batch too large",
                                    "limit": 5000,
                                    "got": len(req.documents)})
    importer = get_bulk_importer()

    def _import_worker(docs_snapshot, job_id):
        importer.start_job(job_id, total=len(docs_snapshot))
        # Add in chunks so the pipeline stays responsive
        chunk_size = 50
        for i in range(0, len(docs_snapshot), chunk_size):
            chunk = docs_snapshot[i:i + chunk_size]
            try:
                _pipeline.add_documents([
                    Document(id=d["id"], text=d["text"])
                    for d in chunk
                ])
                for d in chunk:
                    importer.record_item(job_id, d["id"], ok=True)
            except Exception as e:
                error_msg = f"{type(e).__name__}: {e}"[:200]
                for d in chunk:
                    importer.record_item(job_id, d["id"],
                                         ok=False, error=error_msg)
        importer.mark_finished(job_id)

    docs_data = [{"id": d.id, "text": d.text} for d in req.documents]
    queue = get_job_queue()
    # Pre-allocate a job_id so the progress entry exists before worker runs
    # Put a placeholder
    import uuid
    job_id = "bulk_" + uuid.uuid4().hex[:16]
    # Start via queue but give it our pre-chosen id via meta
    queue_id = queue.enqueue(
        lambda: _import_worker(docs_data, job_id),
        kind="bulk_import",
        meta={"bulk_job_id": job_id, "n_docs": len(docs_data)},
    )
    # Seed initial progress record so client polling works immediately
    importer.start_job(job_id, total=len(docs_data))
    return {"job_id": job_id, "queue_id": queue_id,
            "status": "queued", "total": len(docs_data)}


@app.get("/v1/documents/bulk_async/{job_id}")
def bulk_import_status(job_id: str):
    from ..middleware.bulk_import import get_bulk_importer
    p = get_bulk_importer().get(job_id)
    if p is None:
        raise HTTPException(status_code=404,
                            detail={"error": "bulk job not found",
                                    "job_id": job_id})
    return p.to_dict()


@app.post("/v1/documents/bulk_async/{job_id}/cancel")
def bulk_import_cancel(job_id: str, request: Request):
    from ..middleware.bulk_import import get_bulk_importer
    if not get_bulk_importer().mark_cancelled(job_id):
        raise HTTPException(status_code=404,
                            detail={"error": "job not cancellable",
                                    "job_id": job_id})
    return {"ok": True, "job_id": job_id, "cancelled": True}


@app.get("/v1/admin/bulk_import")
def admin_bulk_import_list():
    from ..middleware.bulk_import import get_bulk_importer
    i = get_bulk_importer()
    return {"stats": i.stats(), "jobs": i.all_jobs()}


# ---- v2.51 Document ACL ------------------------------------------------
class DocACLSetRequest(BaseModel):
    doc_id: str
    labels: List[str]


class DocACLLabelRequest(BaseModel):
    doc_id: str
    label: str


@app.get("/v1/admin/doc_acl")
def admin_doc_acl_get():
    from ..middleware.doc_acl import get_doc_acl
    s = get_doc_acl()
    return {"stats": s.stats(), "entries": s.all_acls()}


@app.post("/v1/admin/doc_acl/set")
def admin_doc_acl_set(req: DocACLSetRequest, request: Request):
    from ..middleware.doc_acl import get_doc_acl
    s = get_doc_acl()
    s.set_labels(req.doc_id, req.labels)
    get_obs().audit(
        "doc_acl.set",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        doc_id=req.doc_id, labels=req.labels,
    )
    return {"ok": True, "doc_id": req.doc_id,
            "labels": sorted(s.required_labels(req.doc_id))}


@app.post("/v1/admin/doc_acl/add")
def admin_doc_acl_add(req: DocACLLabelRequest, request: Request):
    from ..middleware.doc_acl import get_doc_acl
    s = get_doc_acl()
    s.add_label(req.doc_id, req.label)
    return {"ok": True, "doc_id": req.doc_id,
            "labels": sorted(s.required_labels(req.doc_id))}


@app.delete("/v1/admin/doc_acl/{doc_id}")
def admin_doc_acl_clear(doc_id: str, request: Request):
    from ..middleware.doc_acl import get_doc_acl
    s = get_doc_acl()
    if not s.clear(doc_id):
        raise HTTPException(status_code=404,
                            detail={"error": "no ACL",
                                    "doc_id": doc_id})
    return {"ok": True, "doc_id": doc_id, "cleared": True}


# ---- v2.52 Baggage ------------------------------------------------------
class BaggageConfigRequest(BaseModel):
    enabled: bool


@app.get("/v1/admin/baggage")
def admin_baggage_get():
    from ..middleware.baggage import get_baggage_manager
    return get_baggage_manager().stats()


@app.post("/v1/admin/baggage")
def admin_baggage_set(req: BaggageConfigRequest, request: Request):
    from ..middleware.baggage import get_baggage_manager
    m = get_baggage_manager()
    m.set_enabled(req.enabled)
    get_obs().audit(
        "baggage.toggle",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, **m.stats()}


# ---- v2.53 Canary -------------------------------------------------------
class CanaryVariantRequest(BaseModel):
    name: str
    weight: float


class CanarySetRoutesRequest(BaseModel):
    route_key: str
    variants: List[CanaryVariantRequest]


@app.get("/v1/admin/canary")
def admin_canary_get():
    from ..middleware.canary import get_canary_router
    r = get_canary_router()
    routes = {k: r.get_split(k) for k in r.all_routes()}
    return {"stats": r.stats(), "routes": routes}


@app.post("/v1/admin/canary/routes")
def admin_canary_set_route(req: CanarySetRoutesRequest,
                            request: Request):
    from ..middleware.canary import get_canary_router
    r = get_canary_router()
    try:
        r.set_variants(req.route_key,
                        [{"name": v.name, "weight": v.weight}
                          for v in req.variants])
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "canary.set_variants",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        route_key=req.route_key,
        variants=[{"name": v.name, "weight": v.weight}
                   for v in req.variants],
    )
    return {"ok": True, **r.get_split(req.route_key)}


@app.delete("/v1/admin/canary/routes/{route_key}")
def admin_canary_remove(route_key: str, request: Request):
    from ..middleware.canary import get_canary_router
    r = get_canary_router()
    if not r.remove_route(route_key):
        raise HTTPException(status_code=404,
                            detail={"error": "route not found",
                                    "route_key": route_key})
    return {"ok": True, "route_key": route_key, "removed": True}


class CanaryStickyRequest(BaseModel):
    strategy: str   # "none" | "hash"


@app.post("/v1/admin/canary/sticky")
def admin_canary_sticky(req: CanaryStickyRequest, request: Request):
    from ..middleware.canary import get_canary_router
    r = get_canary_router()
    try:
        r.set_sticky(req.strategy)
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    return {"ok": True, **r.stats()}


# ---- v2.54 Autocomplete ------------------------------------------------
@app.get("/v1/autocomplete")
def autocomplete_suggest(prefix: str, limit: int = 10):
    """Suggest queries matching the given prefix."""
    from ..middleware.autocomplete import get_autocomplete
    s = get_autocomplete()
    return {"prefix": prefix,
            "suggestions": s.suggest(prefix, limit=limit)}


class AutocompleteObserveRequest(BaseModel):
    query: str


@app.post("/v1/autocomplete/observe")
def autocomplete_observe(req: AutocompleteObserveRequest):
    """Record a query for future suggestions. Usually called
    automatically from the search pipeline; manual endpoint for
    seeding + testing."""
    from ..middleware.autocomplete import get_autocomplete
    get_autocomplete().observe(req.query)
    return {"ok": True}


@app.get("/v1/admin/autocomplete")
def admin_autocomplete_get(top_n: int = 20):
    from ..middleware.autocomplete import get_autocomplete
    s = get_autocomplete()
    return {"stats": s.stats(), "top": s.top(n=top_n)}


class AutocompleteConfigRequest(BaseModel):
    enabled: bool


@app.post("/v1/admin/autocomplete")
def admin_autocomplete_set(req: AutocompleteConfigRequest,
                            request: Request):
    from ..middleware.autocomplete import get_autocomplete
    s = get_autocomplete()
    s.set_enabled(req.enabled)
    return {"ok": True, **s.stats()}


@app.post("/v1/admin/autocomplete/reset")
def admin_autocomplete_reset(request: Request):
    from ..middleware.autocomplete import get_autocomplete
    get_autocomplete().reset()
    return {"ok": True}


# ---- v2.55 Pre-deploy eval gate ----------------------------------------
@app.post("/v1/admin/eval_gate")
def admin_eval_gate_run(request: Request):
    """Run all registered eval checks; return pass/fail."""
    from ..middleware.eval_gate import get_eval_gate
    g = get_eval_gate()
    report = g.run()
    get_obs().audit(
        "eval_gate.run",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        passed=report["passed"],
        n_checks=report["n_checks"],
        n_failed=report["n_failed"],
    )
    return report


@app.get("/v1/admin/eval_gate")
def admin_eval_gate_list():
    from ..middleware.eval_gate import get_eval_gate
    g = get_eval_gate()
    return {"stats": g.stats(), "checks": g.list_checks()}


# ---- v2.56 Near-duplicate detection ------------------------------------
class NearDupIndexRequest(BaseModel):
    doc_id: str
    text: str


class NearDupQueryRequest(BaseModel):
    text: str
    threshold: float = 0.8
    limit: int = 10


@app.get("/v1/admin/near_dup")
def admin_near_dup_get():
    from ..middleware.near_dup import get_near_dup_index
    return get_near_dup_index().stats()


@app.post("/v1/admin/near_dup/index")
def admin_near_dup_index(req: NearDupIndexRequest, request: Request):
    from ..middleware.near_dup import get_near_dup_index
    i = get_near_dup_index()
    i.index(req.doc_id, req.text)
    return {"ok": True, "doc_id": req.doc_id,
            "n_signatures": i.stats()["n_signatures"]}


@app.delete("/v1/admin/near_dup/{doc_id}")
def admin_near_dup_remove(doc_id: str, request: Request):
    from ..middleware.near_dup import get_near_dup_index
    i = get_near_dup_index()
    if not i.remove(doc_id):
        raise HTTPException(status_code=404,
                            detail={"error": "not indexed",
                                    "doc_id": doc_id})
    return {"ok": True, "doc_id": doc_id, "removed": True}


@app.post("/v1/admin/near_dup/query")
def admin_near_dup_query(req: NearDupQueryRequest):
    from ..middleware.near_dup import get_near_dup_index
    matches = get_near_dup_index().find_near_dups_for_text(
        req.text, threshold=req.threshold, limit=req.limit)
    return {
        "threshold": req.threshold,
        "matches": [{"doc_id": m.doc_id,
                      "similarity": m.similarity}
                     for m in matches],
    }


@app.get("/v1/admin/near_dup/pairs")
def admin_near_dup_pairs(threshold: float = 0.8):
    from ..middleware.near_dup import get_near_dup_index
    return {"pairs": get_near_dup_index().pairs(threshold=threshold)}


# ---- v2.57 Query intent classification ---------------------------------
class ClassifyIntentRequest(BaseModel):
    query: str


@app.post("/v1/query/intent")
def query_intent(req: ClassifyIntentRequest):
    """Classify a query's intent. Useful for client-side routing or
    UX adaptation."""
    from ..middleware.query_intent import get_intent_classifier
    result = get_intent_classifier().classify(req.query)
    return {
        "query": req.query,
        "intent": result.intent,
        "confidence": result.confidence,
        "matched_pattern": result.matched_pattern,
        "all_scores": result.all_scores,
    }


@app.get("/v1/admin/query_intent")
def admin_query_intent_get():
    from ..middleware.query_intent import get_intent_classifier
    return get_intent_classifier().stats()


class IntentConfigRequest(BaseModel):
    enabled: bool


@app.post("/v1/admin/query_intent")
def admin_query_intent_set(req: IntentConfigRequest, request: Request):
    from ..middleware.query_intent import get_intent_classifier
    c = get_intent_classifier()
    c.set_enabled(req.enabled)
    return {"ok": True, **c.stats()}


@app.post("/v1/admin/query_intent/reset")
def admin_query_intent_reset(request: Request):
    from ..middleware.query_intent import get_intent_classifier
    get_intent_classifier().reset()
    return {"ok": True}


# ---- v2.58 Document versioning -----------------------------------------
class DocVersionAddRequest(BaseModel):
    doc_id: str
    text: str
    note: Optional[str] = None


@app.post("/v1/admin/doc_versions")
def admin_doc_version_add(req: DocVersionAddRequest, request: Request):
    from ..middleware.doc_versioning import get_doc_versions
    s = get_doc_versions()
    actor = _hash_prefix(request.headers.get("x-api-key"))
    v = s.add_version(req.doc_id, req.text,
                       created_by=actor, note=req.note)
    get_obs().audit(
        "doc_version.add",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        doc_id=req.doc_id, version_id=v.version_id,
    )
    return {"ok": True, "version": v.to_dict()}


@app.get("/v1/admin/doc_versions/{doc_id}")
def admin_doc_version_list(doc_id: str):
    from ..middleware.doc_versioning import get_doc_versions
    s = get_doc_versions()
    return {"doc_id": doc_id, "versions": s.list_versions(doc_id)}


@app.get("/v1/admin/doc_versions/{doc_id}/{version_id}")
def admin_doc_version_get(doc_id: str, version_id: str):
    from ..middleware.doc_versioning import get_doc_versions
    v = get_doc_versions().get_version(doc_id, version_id)
    if v is None:
        raise HTTPException(status_code=404,
                            detail={"error": "version not found",
                                    "doc_id": doc_id,
                                    "version_id": version_id})
    return v


@app.delete("/v1/admin/doc_versions/{doc_id}")
def admin_doc_version_remove_doc(doc_id: str, request: Request):
    from ..middleware.doc_versioning import get_doc_versions
    n = get_doc_versions().remove_doc(doc_id)
    return {"ok": True, "doc_id": doc_id, "removed": n}


@app.get("/v1/admin/doc_versions/{doc_id}/diff")
def admin_doc_version_diff(doc_id: str, va: str, vb: str):
    from ..middleware.doc_versioning import get_doc_versions
    return get_doc_versions().diff(doc_id, va, vb)


@app.get("/v1/admin/doc_versions")
def admin_doc_version_stats():
    from ..middleware.doc_versioning import get_doc_versions
    return get_doc_versions().stats()


# ---- v2.59 Concurrency limit -------------------------------------------
class ConcurrencyLimitRequest(BaseModel):
    key_prefix: str
    limit: int


class ConcurrencyDefaultRequest(BaseModel):
    default_limit: int


@app.get("/v1/admin/concurrency")
def admin_concurrency_get():
    from ..middleware.concurrency_limit import get_concurrency_limiter
    l = get_concurrency_limiter()
    return {"stats": l.stats(), "inflight": l.all_inflight()}


@app.post("/v1/admin/concurrency/key")
def admin_concurrency_set_key(req: ConcurrencyLimitRequest,
                               request: Request):
    from ..middleware.concurrency_limit import get_concurrency_limiter
    l = get_concurrency_limiter()
    l.set_limit(req.key_prefix, req.limit)
    get_obs().audit(
        "concurrency.set_key",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        key_prefix=req.key_prefix, limit=req.limit,
    )
    return {"ok": True, "key_prefix": req.key_prefix,
            "limit": req.limit}


@app.post("/v1/admin/concurrency/default")
def admin_concurrency_set_default(req: ConcurrencyDefaultRequest,
                                    request: Request):
    from ..middleware.concurrency_limit import get_concurrency_limiter
    l = get_concurrency_limiter()
    l.set_default(req.default_limit)
    return {"ok": True, "default_limit": l.default_limit}


# ---- v2.60 Language detection ------------------------------------------
class DetectLangRequest(BaseModel):
    text: str


@app.post("/v1/query/detect_language")
def query_detect_language(req: DetectLangRequest):
    from ..middleware.lang_detect import get_language_detector
    r = get_language_detector().detect(req.text)
    return {
        "language": r.language,
        "confidence": r.confidence,
        "scores": r.scores,
        "n_chars": r.n_chars_total,
    }


@app.get("/v1/admin/language")
def admin_language_get():
    from ..middleware.lang_detect import get_language_detector
    return get_language_detector().stats()


@app.post("/v1/admin/language/reset")
def admin_language_reset(request: Request):
    from ..middleware.lang_detect import get_language_detector
    get_language_detector().reset()
    return {"ok": True}


# ---- v2.61 XSS sanitizer -----------------------------------------------
class XSSPathRequest(BaseModel):
    path: str


@app.get("/v1/admin/xss")
def admin_xss_get():
    from ..middleware.xss_sanitize import get_xss_sanitizer
    s = get_xss_sanitizer()
    return {"paths": s.all_paths(), "stats": s.stats()}


@app.post("/v1/admin/xss/register")
def admin_xss_register(req: XSSPathRequest, request: Request):
    from ..middleware.xss_sanitize import get_xss_sanitizer
    s = get_xss_sanitizer()
    s.register(req.path)
    get_obs().audit(
        "xss.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        path=req.path,
    )
    return {"ok": True, "paths": s.all_paths()}


@app.delete("/v1/admin/xss/{path:path}")
def admin_xss_unregister(path: str, request: Request):
    from ..middleware.xss_sanitize import get_xss_sanitizer
    s = get_xss_sanitizer()
    if not s.unregister(path):
        raise HTTPException(status_code=404,
                            detail={"error": "path not registered",
                                    "path": path})
    return {"ok": True, "path": path, "removed": True}


# ---- v2.62 Resource pool ------------------------------------------------
class ResourcePoolRegisterRequest(BaseModel):
    name: str
    capacity: int


@app.get("/v1/admin/resource_pools")
def admin_resource_pools_get():
    from ..middleware.resource_pool import get_resource_pool
    p = get_resource_pool()
    return {"stats": p.stats(), "pools": p.all_pools()}


@app.post("/v1/admin/resource_pools")
def admin_resource_pool_register(req: ResourcePoolRegisterRequest,
                                   request: Request):
    from ..middleware.resource_pool import get_resource_pool
    p = get_resource_pool()
    try:
        pool = p.register(req.name, req.capacity)
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "resource_pool.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=req.name, capacity=req.capacity,
    )
    return {"ok": True, "name": pool.name, "capacity": pool.capacity}


@app.delete("/v1/admin/resource_pools/{name}")
def admin_resource_pool_remove(name: str, request: Request):
    from ..middleware.resource_pool import get_resource_pool
    p = get_resource_pool()
    if not p.unregister(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": name})
    return {"ok": True, "name": name, "removed": True}


# ---- v2.63 Cache warmup ------------------------------------------------
class CacheWarmupFromFileRequest(BaseModel):
    path: str


class CacheWarmupListRequest(BaseModel):
    queries: List[Dict[str, Any]]


@app.get("/v1/admin/cache_warmup")
def admin_cache_warmup_get():
    from ..middleware.cache_warmup import get_cache_warmer
    return get_cache_warmer().progress()


@app.post("/v1/admin/cache_warmup/from_file")
def admin_cache_warmup_from_file(req: CacheWarmupFromFileRequest,
                                   request: Request):
    from ..middleware.cache_warmup import get_cache_warmer
    w = get_cache_warmer()
    if not w.warm_from_file(req.path):
        raise HTTPException(status_code=400,
                            detail={"error": "file not found or "
                                             "warmup already running",
                                    "path": req.path})
    get_obs().audit(
        "cache_warmup.from_file",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        path=req.path,
    )
    return {"ok": True, "status": "started"}


@app.post("/v1/admin/cache_warmup/from_autocomplete")
def admin_cache_warmup_from_autocomplete(top_n: int = 50,
                                           request: Request = None):
    from ..middleware.cache_warmup import get_cache_warmer
    w = get_cache_warmer()
    if not w.warm_from_autocomplete(top_n=top_n):
        raise HTTPException(status_code=400,
                            detail={"error": "warmup already running"})
    return {"ok": True, "status": "started", "top_n": top_n}


@app.post("/v1/admin/cache_warmup/from_list")
def admin_cache_warmup_from_list(req: CacheWarmupListRequest,
                                   request: Request):
    from ..middleware.cache_warmup import get_cache_warmer
    w = get_cache_warmer()
    if not w.warm_from_list(req.queries):
        raise HTTPException(status_code=400,
                            detail={"error": "warmup already running"})
    return {"ok": True, "status": "started",
            "n_queries": len(req.queries)}


@app.post("/v1/admin/cache_warmup/cancel")
def admin_cache_warmup_cancel(request: Request):
    from ..middleware.cache_warmup import get_cache_warmer
    if not get_cache_warmer().cancel():
        raise HTTPException(status_code=400,
                            detail={"error": "no warmup running"})
    return {"ok": True}


# ---- v2.64 Lazy init registry ------------------------------------------
@app.get("/v1/admin/lazy_init")
def admin_lazy_init_get():
    from ..middleware.lazy_init import get_lazy_registry
    r = get_lazy_registry()
    return {"summary": r.summary(), "entries": r.status()}


@app.post("/v1/admin/lazy_init/preload/{name}")
def admin_lazy_init_preload(name: str, request: Request):
    from ..middleware.lazy_init import get_lazy_registry
    r = get_lazy_registry()
    if not r.is_registered(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": name})
    ok = r.preload(name)
    get_obs().audit(
        "lazy_init.preload",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=name, ok=ok,
    )
    return {"ok": ok, "name": name, "status": r.get_status(name)}


@app.post("/v1/admin/lazy_init/reset/{name}")
def admin_lazy_init_reset(name: str, request: Request):
    from ..middleware.lazy_init import get_lazy_registry
    r = get_lazy_registry()
    if not r.reset(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": name})
    get_obs().audit(
        "lazy_init.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=name,
    )
    return {"ok": True, "name": name}


@app.post("/v1/admin/lazy_init/preload_all")
def admin_lazy_init_preload_all(request: Request):
    from ..middleware.lazy_init import get_lazy_registry
    r = get_lazy_registry()
    results = r.preload_all()
    get_obs().audit(
        "lazy_init.preload_all",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        n=len(results),
    )
    return {"ok": True, "results": results, "summary": r.summary()}


# ---- v2.65 Pipeline failover ------------------------------------------
class FailoverPolicyRequest(BaseModel):
    policy: str


class FailoverPriorityRequest(BaseModel):
    name: str
    priority: int


@app.get("/v1/admin/failover")
def admin_failover_get():
    from ..middleware.pipeline_failover import get_failover_router
    r = get_failover_router()
    return {"policy": r.policy(), "chain": r.chain(), "stats": r.stats()}


@app.post("/v1/admin/failover/policy")
def admin_failover_set_policy(req: FailoverPolicyRequest,
                                request: Request):
    from ..middleware.pipeline_failover import get_failover_router
    r = get_failover_router()
    try:
        r.set_policy(req.policy)
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "failover.set_policy",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        policy=req.policy,
    )
    return {"ok": True, "policy": r.policy()}


@app.post("/v1/admin/failover/priority")
def admin_failover_set_priority(req: FailoverPriorityRequest,
                                  request: Request):
    from ..middleware.pipeline_failover import get_failover_router
    r = get_failover_router()
    if not r.set_priority(req.name, req.priority):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": req.name})
    get_obs().audit(
        "failover.set_priority",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=req.name, priority=req.priority,
    )
    return {"ok": True, "name": req.name, "priority": req.priority,
            "chain": r.chain()}


@app.post("/v1/admin/failover/reset")
def admin_failover_reset(request: Request):
    from ..middleware.pipeline_failover import get_failover_router
    get_failover_router().reset()
    get_obs().audit(
        "failover.reset",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
    )
    return {"ok": True}


# ---- v2.66 Intent-aware reranker ---------------------------------------
class IntentRerankEnableRequest(BaseModel):
    enabled: bool


class IntentRerankWeightRequest(BaseModel):
    intent: str
    doc_type: str
    weight: float


@app.get("/v1/admin/intent_rerank")
def admin_intent_rerank_get():
    from ..middleware.intent_rerank import get_intent_reranker
    r = get_intent_reranker()
    return {"stats": r.stats(), "weights": r.weights()}


@app.post("/v1/admin/intent_rerank/enable")
def admin_intent_rerank_enable(req: IntentRerankEnableRequest,
                                 request: Request):
    from ..middleware.intent_rerank import get_intent_reranker
    r = get_intent_reranker()
    r.set_enabled(req.enabled)
    get_obs().audit(
        "intent_rerank.enable",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, "enabled": r.is_enabled()}


@app.post("/v1/admin/intent_rerank/weight")
def admin_intent_rerank_set_weight(req: IntentRerankWeightRequest,
                                      request: Request):
    from ..middleware.intent_rerank import get_intent_reranker
    r = get_intent_reranker()
    try:
        r.set_weight(req.intent, req.doc_type, req.weight)
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "intent_rerank.set_weight",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        intent=req.intent, doc_type=req.doc_type, weight=req.weight,
    )
    return {"ok": True, "intent": req.intent, "doc_type": req.doc_type,
            "weight": r.get_weight(req.intent, req.doc_type)}


@app.post("/v1/admin/intent_rerank/reset")
def admin_intent_rerank_reset(request: Request):
    from ..middleware.intent_rerank import get_intent_reranker
    get_intent_reranker().reset()
    return {"ok": True}


# ---- v2.67 Stage budgets ------------------------------------------------
class StageBudgetRegisterRequest(BaseModel):
    stage: str
    budget_ms: float


class StageBudgetTrackRequest(BaseModel):
    stage: str
    duration_ms: float


@app.get("/v1/admin/stage_budgets")
def admin_stage_budgets_get():
    from ..middleware.stage_budgets import get_stage_budgets
    return get_stage_budgets().stats()


@app.post("/v1/admin/stage_budgets")
def admin_stage_budgets_register(req: StageBudgetRegisterRequest,
                                    request: Request):
    from ..middleware.stage_budgets import get_stage_budgets
    s = get_stage_budgets()
    try:
        s.register(req.stage, req.budget_ms)
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "stage_budgets.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        stage=req.stage, budget_ms=req.budget_ms,
    )
    return {"ok": True, "stage": req.stage, "budget_ms": req.budget_ms}


@app.delete("/v1/admin/stage_budgets/{stage}")
def admin_stage_budgets_unregister(stage: str, request: Request):
    from ..middleware.stage_budgets import get_stage_budgets
    if not get_stage_budgets().unregister(stage):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "stage": stage})
    return {"ok": True, "stage": stage, "removed": True}


@app.post("/v1/admin/stage_budgets/track")
def admin_stage_budgets_track(req: StageBudgetTrackRequest):
    from ..middleware.stage_budgets import get_stage_budgets
    result = get_stage_budgets().track(req.stage, req.duration_ms)
    return result.to_dict()


@app.post("/v1/admin/stage_budgets/reset")
def admin_stage_budgets_reset(request: Request,
                                 stage: Optional[str] = None):
    from ..middleware.stage_budgets import get_stage_budgets
    get_stage_budgets().reset(stage)
    return {"ok": True, "stage": stage or "all"}


# ---- v2.68 Graceful degradation ----------------------------------------
class DegradationModeRequest(BaseModel):
    mode: str
    reason: Optional[str] = "manual"


class DegradationFeatureRequest(BaseModel):
    feature: str
    tier: int


@app.get("/v1/admin/degradation")
def admin_degradation_get():
    from ..middleware.graceful_degradation import get_graceful_degradation
    return get_graceful_degradation().snapshot()


@app.post("/v1/admin/degradation/mode")
def admin_degradation_set_mode(req: DegradationModeRequest,
                                  request: Request):
    from ..middleware.graceful_degradation import get_graceful_degradation
    gd = get_graceful_degradation()
    try:
        trans = gd.set_mode(req.mode, reason=req.reason or "manual",
                              actor=request.headers.get("x-api-key"))
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "degradation.set_mode",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        from_mode=trans.from_mode, to_mode=trans.to_mode,
        reason=trans.reason,
    )
    return trans.to_dict()


@app.post("/v1/admin/degradation/degrade")
def admin_degradation_degrade(request: Request,
                                 reason: str = "load"):
    from ..middleware.graceful_degradation import get_graceful_degradation
    gd = get_graceful_degradation()
    trans = gd.degrade(reason=reason)
    if trans is None:
        raise HTTPException(status_code=400,
                            detail={"error": "already at emergency"})
    return trans.to_dict()


@app.post("/v1/admin/degradation/recover")
def admin_degradation_recover(request: Request,
                                  reason: str = "load_subsided"):
    from ..middleware.graceful_degradation import get_graceful_degradation
    gd = get_graceful_degradation()
    trans = gd.recover(reason=reason)
    if trans is None:
        raise HTTPException(status_code=400,
                            detail={"error": "already normal"})
    return trans.to_dict()


@app.post("/v1/admin/degradation/feature")
def admin_degradation_register_feature(req: DegradationFeatureRequest,
                                           request: Request):
    from ..middleware.graceful_degradation import get_graceful_degradation
    gd = get_graceful_degradation()
    try:
        gd.register_feature(req.feature, req.tier)
    except ValueError as e:
        raise HTTPException(status_code=400,
                            detail={"error": str(e)})
    get_obs().audit(
        "degradation.register_feature",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        feature=req.feature, tier=req.tier,
    )
    return {"ok": True, "feature": req.feature, "tier": req.tier}


# ---- v2.69 Query rewrite pipeline --------------------------------------
class QueryRewriteEnableRequest(BaseModel):
    enabled: bool


class QueryRewriteStepToggleRequest(BaseModel):
    name: str
    enabled: bool


class QueryRewriteStepOrderRequest(BaseModel):
    name: str
    order: int


class QueryRewriteRequest(BaseModel):
    query: str


@app.get("/v1/admin/query_rewrite")
def admin_query_rewrite_get():
    from ..middleware.query_rewrite import get_query_rewrite
    return get_query_rewrite().stats()


@app.post("/v1/admin/query_rewrite/enable")
def admin_query_rewrite_enable(req: QueryRewriteEnableRequest,
                                  request: Request):
    from ..middleware.query_rewrite import get_query_rewrite
    p = get_query_rewrite()
    p.set_enabled(req.enabled)
    get_obs().audit(
        "query_rewrite.enable",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, "enabled": p.is_enabled()}


@app.post("/v1/admin/query_rewrite/step/toggle")
def admin_query_rewrite_step_toggle(req: QueryRewriteStepToggleRequest,
                                        request: Request):
    from ..middleware.query_rewrite import get_query_rewrite
    if not get_query_rewrite().set_step_enabled(req.name, req.enabled):
        raise HTTPException(status_code=404,
                            detail={"error": "step not found",
                                    "name": req.name})
    return {"ok": True, "name": req.name, "enabled": req.enabled}


@app.post("/v1/admin/query_rewrite/step/order")
def admin_query_rewrite_step_order(req: QueryRewriteStepOrderRequest,
                                       request: Request):
    from ..middleware.query_rewrite import get_query_rewrite
    if not get_query_rewrite().set_step_order(req.name, req.order):
        raise HTTPException(status_code=404,
                            detail={"error": "step not found",
                                    "name": req.name})
    return {"ok": True, "name": req.name, "order": req.order}


@app.post("/v1/admin/query_rewrite/try")
def admin_query_rewrite_try(req: QueryRewriteRequest):
    from ..middleware.query_rewrite import get_query_rewrite
    result = get_query_rewrite().rewrite(req.query)
    return result.to_dict()


@app.post("/v1/admin/query_rewrite/reset")
def admin_query_rewrite_reset(request: Request):
    from ..middleware.query_rewrite import get_query_rewrite
    get_query_rewrite().reset()
    return {"ok": True}


# ---- v2.70 Semantic cache ----------------------------------------------
class SemanticCacheCapacityRequest(BaseModel):
    capacity: int


class SemanticCacheTTLRequest(BaseModel):
    ttl_sec: Optional[float] = None


@app.get("/v1/admin/semantic_cache")
def admin_semantic_cache_get(limit: int = 20):
    from ..middleware.semantic_cache import get_semantic_cache
    c = get_semantic_cache()
    return {"stats": c.stats(), "entries": c.entries(limit=limit)}


@app.post("/v1/admin/semantic_cache/capacity")
def admin_semantic_cache_set_capacity(req: SemanticCacheCapacityRequest,
                                         request: Request):
    from ..middleware.semantic_cache import get_semantic_cache
    c = get_semantic_cache()
    try:
        c.set_capacity(req.capacity)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "semantic_cache.set_capacity",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        capacity=req.capacity,
    )
    return {"ok": True, "capacity": req.capacity}


@app.post("/v1/admin/semantic_cache/ttl")
def admin_semantic_cache_set_ttl(req: SemanticCacheTTLRequest,
                                     request: Request):
    from ..middleware.semantic_cache import get_semantic_cache
    c = get_semantic_cache()
    try:
        c.set_default_ttl(req.ttl_sec)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "ttl_sec": req.ttl_sec}


@app.post("/v1/admin/semantic_cache/purge_expired")
def admin_semantic_cache_purge_expired(request: Request):
    from ..middleware.semantic_cache import get_semantic_cache
    n = get_semantic_cache().purge_expired()
    return {"ok": True, "purged": n}


@app.post("/v1/admin/semantic_cache/clear")
def admin_semantic_cache_clear(request: Request):
    from ..middleware.semantic_cache import get_semantic_cache
    n = get_semantic_cache().clear()
    get_obs().audit(
        "semantic_cache.clear",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        cleared=n,
    )
    return {"ok": True, "cleared": n}


# ---- v2.71 Score calibration -------------------------------------------
class ScoreCalibRegisterRequest(BaseModel):
    name: str
    method: str = "minmax"
    window: int = 512
    warmup_n: int = 20


class ScoreCalibMethodRequest(BaseModel):
    name: str
    method: str


@app.get("/v1/admin/score_calibration")
def admin_score_calib_get():
    from ..middleware.score_calibration import get_score_calibrator
    return get_score_calibrator().stats()


@app.post("/v1/admin/score_calibration")
def admin_score_calib_register(req: ScoreCalibRegisterRequest,
                                  request: Request):
    from ..middleware.score_calibration import get_score_calibrator
    c = get_score_calibrator()
    try:
        c.register(req.name, method=req.method,
                    window=req.window, warmup_n=req.warmup_n)
    except (ValueError, KeyError) as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "score_calibration.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=req.name, method=req.method,
    )
    return {"ok": True, "name": req.name, "method": req.method}


@app.post("/v1/admin/score_calibration/method")
def admin_score_calib_set_method(req: ScoreCalibMethodRequest,
                                     request: Request):
    from ..middleware.score_calibration import get_score_calibrator
    c = get_score_calibrator()
    try:
        ok = c.set_method(req.name, req.method)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    if not ok:
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": req.name})
    return {"ok": True, "name": req.name, "method": req.method}


@app.delete("/v1/admin/score_calibration/{name}")
def admin_score_calib_unregister(name: str, request: Request):
    from ..middleware.score_calibration import get_score_calibrator
    if not get_score_calibrator().unregister(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": name})
    return {"ok": True, "name": name, "removed": True}


@app.post("/v1/admin/score_calibration/reset")
def admin_score_calib_reset(request: Request,
                                name: Optional[str] = None):
    from ..middleware.score_calibration import get_score_calibrator
    get_score_calibrator().reset(name)
    return {"ok": True, "name": name or "all"}


# ---- v2.72 Answer postprocessor ----------------------------------------
class PostprocessorEnableRequest(BaseModel):
    enabled: bool


class PostprocessorStepToggleRequest(BaseModel):
    name: str
    enabled: bool


class PostprocessorMaxCharsRequest(BaseModel):
    max_chars: Optional[int] = None


class PostprocessorProcessRequest(BaseModel):
    text: str


@app.get("/v1/admin/postprocess")
def admin_postprocess_get():
    from ..middleware.answer_postprocess import get_answer_postprocessor
    return get_answer_postprocessor().stats()


@app.post("/v1/admin/postprocess/enable")
def admin_postprocess_enable(req: PostprocessorEnableRequest,
                                request: Request):
    from ..middleware.answer_postprocess import get_answer_postprocessor
    p = get_answer_postprocessor()
    p.set_enabled(req.enabled)
    get_obs().audit(
        "postprocess.enable",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, "enabled": p.is_enabled()}


@app.post("/v1/admin/postprocess/step/toggle")
def admin_postprocess_step_toggle(req: PostprocessorStepToggleRequest,
                                       request: Request):
    from ..middleware.answer_postprocess import get_answer_postprocessor
    if not get_answer_postprocessor().set_step_enabled(req.name,
                                                         req.enabled):
        raise HTTPException(status_code=404,
                            detail={"error": "step not found",
                                    "name": req.name})
    return {"ok": True, "name": req.name, "enabled": req.enabled}


@app.post("/v1/admin/postprocess/max_chars")
def admin_postprocess_max_chars(req: PostprocessorMaxCharsRequest,
                                    request: Request):
    from ..middleware.answer_postprocess import get_answer_postprocessor
    get_answer_postprocessor().set_max_output_chars(req.max_chars)
    return {"ok": True, "max_chars": req.max_chars}


@app.post("/v1/admin/postprocess/try")
def admin_postprocess_try(req: PostprocessorProcessRequest):
    from ..middleware.answer_postprocess import get_answer_postprocessor
    result = get_answer_postprocessor().process(req.text)
    return result.to_dict()


@app.post("/v1/admin/postprocess/reset")
def admin_postprocess_reset(request: Request):
    from ..middleware.answer_postprocess import get_answer_postprocessor
    get_answer_postprocessor().reset()
    return {"ok": True}


# ---- v2.73 Retriever health probes -------------------------------------
@app.get("/v1/admin/retriever_health")
def admin_retriever_health_get():
    from ..middleware.retriever_health import get_retriever_health
    return get_retriever_health().stats()


@app.post("/v1/admin/retriever_health/probe/{name}")
def admin_retriever_health_probe(name: str, request: Request):
    from ..middleware.retriever_health import get_retriever_health
    t = get_retriever_health()
    if not t.is_registered(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": name})
    result = t.probe(name)
    return result.to_dict()


@app.post("/v1/admin/retriever_health/probe_all")
def admin_retriever_health_probe_all(request: Request):
    from ..middleware.retriever_health import get_retriever_health
    results = get_retriever_health().probe_all()
    return {"results": {n: r.to_dict() for n, r in results.items()}}


@app.delete("/v1/admin/retriever_health/{name}")
def admin_retriever_health_unregister(name: str, request: Request):
    from ..middleware.retriever_health import get_retriever_health
    if not get_retriever_health().unregister(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": name})
    return {"ok": True, "name": name, "removed": True}


@app.post("/v1/admin/retriever_health/reset")
def admin_retriever_health_reset(request: Request,
                                     name: Optional[str] = None):
    from ..middleware.retriever_health import get_retriever_health
    get_retriever_health().reset(name)
    return {"ok": True, "name": name or "all"}


# ---- v2.74 Stream throttle ---------------------------------------------
class StreamRateRequest(BaseModel):
    stream_id: str
    rate_per_sec: float


class StreamDefaultsRequest(BaseModel):
    rate: Optional[float] = None
    burst: Optional[float] = None
    min_gap_ms: Optional[float] = None


@app.get("/v1/admin/stream_throttle")
def admin_stream_throttle_get():
    from ..middleware.stream_throttle import get_stream_throttle
    return get_stream_throttle().stats()


@app.post("/v1/admin/stream_throttle/rate")
def admin_stream_throttle_set_rate(req: StreamRateRequest,
                                       request: Request):
    from ..middleware.stream_throttle import get_stream_throttle
    t = get_stream_throttle()
    try:
        ok = t.set_rate(req.stream_id, req.rate_per_sec)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    if not ok:
        raise HTTPException(status_code=404,
                            detail={"error": "stream not open",
                                    "stream_id": req.stream_id})
    return {"ok": True, "stream_id": req.stream_id,
            "rate_per_sec": req.rate_per_sec}


@app.post("/v1/admin/stream_throttle/defaults")
def admin_stream_throttle_set_defaults(req: StreamDefaultsRequest,
                                            request: Request):
    from ..middleware.stream_throttle import get_stream_throttle
    t = get_stream_throttle()
    try:
        t.set_defaults(rate=req.rate, burst=req.burst,
                        min_gap_ms=req.min_gap_ms)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "stream_throttle.set_defaults",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        rate=req.rate, burst=req.burst, min_gap_ms=req.min_gap_ms,
    )
    return {"ok": True}


@app.post("/v1/admin/stream_throttle/reset")
def admin_stream_throttle_reset(request: Request):
    from ..middleware.stream_throttle import get_stream_throttle
    get_stream_throttle().reset()
    return {"ok": True}


# ---- v2.75 Context window sizer ----------------------------------------
class ContextModelRequest(BaseModel):
    name: str
    max_context: int
    default_output: int = 1024


class ContextBudgetRequest(BaseModel):
    model: str
    system_tokens: int = 0
    query_tokens: int = 0
    history_tokens: int = 0
    output_reserve: Optional[int] = None
    safety_margin: float = 0.05


@app.get("/v1/admin/context_sizer")
def admin_context_sizer_get():
    from ..middleware.context_sizer import get_context_sizer
    return get_context_sizer().stats()


@app.post("/v1/admin/context_sizer/model")
def admin_context_sizer_register_model(req: ContextModelRequest,
                                            request: Request):
    from ..middleware.context_sizer import get_context_sizer
    s = get_context_sizer()
    try:
        s.register_model(req.name,
                          max_context=req.max_context,
                          default_output=req.default_output,
                          replace=True)
    except (ValueError, KeyError) as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "context_sizer.register_model",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=req.name,
        max_context=req.max_context,
    )
    return {"ok": True, "name": req.name,
            "max_context": req.max_context}


@app.delete("/v1/admin/context_sizer/model/{name}")
def admin_context_sizer_unregister_model(name: str, request: Request):
    from ..middleware.context_sizer import get_context_sizer
    if not get_context_sizer().unregister_model(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not registered",
                                    "name": name})
    return {"ok": True, "name": name, "removed": True}


@app.post("/v1/admin/context_sizer/budget")
def admin_context_sizer_budget(req: ContextBudgetRequest):
    from ..middleware.context_sizer import get_context_sizer
    s = get_context_sizer()
    try:
        result = s.budget(
            model=req.model,
            system_tokens=req.system_tokens,
            query_tokens=req.query_tokens,
            history_tokens=req.history_tokens,
            output_reserve=req.output_reserve,
            safety_margin=req.safety_margin,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/context_sizer/reset")
def admin_context_sizer_reset(request: Request):
    from ..middleware.context_sizer import get_context_sizer
    get_context_sizer().reset()
    return {"ok": True}


# ---- v2.76 Answer confidence -------------------------------------------
class ConfidenceWeightRequest(BaseModel):
    signal: str
    weight: float


class ConfidenceThresholdRequest(BaseModel):
    high: float
    medium: float


class ConfidenceScoreRequest(BaseModel):
    retrieval_count: Optional[int] = None
    min_k: int = 3
    top_score: Optional[float] = None
    second_score: Optional[float] = None
    verifier_pass: Optional[bool] = None
    n_citations: Optional[int] = None
    n_claims: Optional[int] = None
    intent_confidence: Optional[float] = None


@app.get("/v1/admin/confidence")
def admin_confidence_get():
    from ..middleware.answer_confidence import get_confidence_scorer
    return get_confidence_scorer().stats()


@app.post("/v1/admin/confidence/weight")
def admin_confidence_set_weight(req: ConfidenceWeightRequest,
                                    request: Request):
    from ..middleware.answer_confidence import get_confidence_scorer
    try:
        get_confidence_scorer().set_weight(req.signal, req.weight)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "confidence.set_weight",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        signal=req.signal, weight=req.weight,
    )
    return {"ok": True, "signal": req.signal, "weight": req.weight}


@app.post("/v1/admin/confidence/thresholds")
def admin_confidence_set_thresholds(req: ConfidenceThresholdRequest,
                                         request: Request):
    from ..middleware.answer_confidence import get_confidence_scorer
    try:
        get_confidence_scorer().set_thresholds(req.high, req.medium)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "high": req.high, "medium": req.medium}


@app.post("/v1/admin/confidence/score")
def admin_confidence_score(req: ConfidenceScoreRequest):
    from ..middleware.answer_confidence import (
        get_confidence_scorer, ConfidenceSignals,
    )
    signals = ConfidenceSignals(
        retrieval_count=req.retrieval_count,
        min_k=req.min_k,
        top_score=req.top_score,
        second_score=req.second_score,
        verifier_pass=req.verifier_pass,
        n_citations=req.n_citations,
        n_claims=req.n_claims,
        intent_confidence=req.intent_confidence,
    )
    result = get_confidence_scorer().score(signals)
    return result.to_dict()


@app.post("/v1/admin/confidence/reset")
def admin_confidence_reset(request: Request):
    from ..middleware.answer_confidence import get_confidence_scorer
    get_confidence_scorer().reset()
    return {"ok": True}


# ---- v2.77 Legal entity extraction -------------------------------------
class EntityExtractRequest(BaseModel):
    text: str


class EntityTypeToggleRequest(BaseModel):
    type: str
    enabled: bool


@app.get("/v1/admin/legal_entities")
def admin_legal_entities_get():
    from ..middleware.legal_entities import get_legal_entity_extractor
    return get_legal_entity_extractor().stats()


@app.post("/v1/admin/legal_entities/extract")
def admin_legal_entities_extract(req: EntityExtractRequest):
    from ..middleware.legal_entities import get_legal_entity_extractor
    result = get_legal_entity_extractor().extract(req.text)
    return result.to_dict()


@app.post("/v1/admin/legal_entities/type")
def admin_legal_entities_toggle_type(req: EntityTypeToggleRequest,
                                          request: Request):
    from ..middleware.legal_entities import get_legal_entity_extractor
    get_legal_entity_extractor().set_type_enabled(req.type, req.enabled)
    return {"ok": True, "type": req.type, "enabled": req.enabled}


@app.post("/v1/admin/legal_entities/reset")
def admin_legal_entities_reset(request: Request):
    from ..middleware.legal_entities import get_legal_entity_extractor
    get_legal_entity_extractor().reset()
    return {"ok": True}


# ---- v2.78 Conversation summarizer -------------------------------------
class SummarizerPolicyRequest(BaseModel):
    policy: str


class SummarizerConfigRequest(BaseModel):
    window_size: Optional[int] = None
    head_size: Optional[int] = None
    min_turns_to_compress: Optional[int] = None


class SummarizerCompressRequest(BaseModel):
    turns: List[Dict[str, Any]]


@app.get("/v1/admin/conversation_summarizer")
def admin_conversation_summarizer_get():
    from ..middleware.conversation_summarizer import get_conversation_summarizer
    s = get_conversation_summarizer()
    return {"config": s.config(), "stats": s.stats()}


@app.post("/v1/admin/conversation_summarizer/policy")
def admin_conversation_summarizer_set_policy(req: SummarizerPolicyRequest,
                                                   request: Request):
    from ..middleware.conversation_summarizer import get_conversation_summarizer
    try:
        get_conversation_summarizer().set_policy(req.policy)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "conversation_summarizer.set_policy",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        policy=req.policy,
    )
    return {"ok": True, "policy": req.policy}


@app.post("/v1/admin/conversation_summarizer/config")
def admin_conversation_summarizer_config(req: SummarizerConfigRequest,
                                              request: Request):
    from ..middleware.conversation_summarizer import get_conversation_summarizer
    s = get_conversation_summarizer()
    try:
        if req.window_size is not None:
            s.set_window_size(req.window_size)
        if req.head_size is not None:
            s.set_head_size(req.head_size)
        if req.min_turns_to_compress is not None:
            s.set_min_turns_to_compress(req.min_turns_to_compress)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "config": s.config()}


@app.post("/v1/admin/conversation_summarizer/try")
def admin_conversation_summarizer_try(req: SummarizerCompressRequest):
    from ..middleware.conversation_summarizer import get_conversation_summarizer
    result = get_conversation_summarizer().compress(req.turns)
    return result.to_dict()


@app.post("/v1/admin/conversation_summarizer/reset")
def admin_conversation_summarizer_reset(request: Request):
    from ..middleware.conversation_summarizer import get_conversation_summarizer
    get_conversation_summarizer().reset()
    return {"ok": True}


# ---- v2.79 Diversity ranker --------------------------------------------
class DiversityLambdaRequest(BaseModel):
    lambda_weight: float


class DiversitySimRequest(BaseModel):
    similarity: str


class DiversityRankRequest(BaseModel):
    results: List[Dict[str, Any]]
    k: Optional[int] = None


@app.get("/v1/admin/diversity")
def admin_diversity_get():
    from ..middleware.diversity_ranker import get_diversity_ranker
    r = get_diversity_ranker()
    return {"config": r.config(), "stats": r.stats()}


@app.post("/v1/admin/diversity/lambda")
def admin_diversity_set_lambda(req: DiversityLambdaRequest,
                                    request: Request):
    from ..middleware.diversity_ranker import get_diversity_ranker
    try:
        get_diversity_ranker().set_lambda(req.lambda_weight)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "diversity.set_lambda",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        lambda_weight=req.lambda_weight,
    )
    return {"ok": True, "lambda": req.lambda_weight}


@app.post("/v1/admin/diversity/similarity")
def admin_diversity_set_similarity(req: DiversitySimRequest,
                                        request: Request):
    from ..middleware.diversity_ranker import get_diversity_ranker
    try:
        get_diversity_ranker().set_similarity(req.similarity)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "similarity": req.similarity}


@app.post("/v1/admin/diversity/rank")
def admin_diversity_rank(req: DiversityRankRequest):
    from ..middleware.diversity_ranker import get_diversity_ranker
    reranked, decisions = get_diversity_ranker().rank(
        req.results, k=req.k)
    return {
        "reranked": reranked,
        "decisions": [d.to_dict() for d in decisions],
    }


@app.post("/v1/admin/diversity/reset")
def admin_diversity_reset(request: Request):
    from ..middleware.diversity_ranker import get_diversity_ranker
    get_diversity_ranker().reset()
    return {"ok": True}


# ---- v2.80 Synonym expansion -------------------------------------------
class SynonymGroupRequest(BaseModel):
    group_id: str
    terms: List[str]
    case_sensitive: bool = False


class SynonymExpandRequest(BaseModel):
    query: str
    joiner: str = " OR "


@app.get("/v1/admin/synonyms")
def admin_synonyms_get():
    from ..middleware.synonym_expansion import get_synonym_expander
    return get_synonym_expander().stats()


@app.post("/v1/admin/synonyms/group")
def admin_synonyms_register_group(req: SynonymGroupRequest,
                                        request: Request):
    from ..middleware.synonym_expansion import get_synonym_expander
    try:
        get_synonym_expander().register_group(
            req.group_id, req.terms,
            case_sensitive=req.case_sensitive,
            replace=True)
    except (ValueError, KeyError) as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "synonyms.register_group",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        group_id=req.group_id, n_terms=len(req.terms),
    )
    return {"ok": True, "group_id": req.group_id,
            "n_terms": len(req.terms)}


@app.delete("/v1/admin/synonyms/group/{group_id}")
def admin_synonyms_unregister_group(group_id: str, request: Request):
    from ..middleware.synonym_expansion import get_synonym_expander
    if not get_synonym_expander().unregister_group(group_id):
        raise HTTPException(status_code=404,
                            detail={"error": "group not found",
                                    "group_id": group_id})
    return {"ok": True, "group_id": group_id, "removed": True}


@app.post("/v1/admin/synonyms/expand")
def admin_synonyms_expand(req: SynonymExpandRequest):
    from ..middleware.synonym_expansion import get_synonym_expander
    result = get_synonym_expander().expand(req.query, joiner=req.joiner)
    return result.to_dict()


@app.post("/v1/admin/synonyms/reset_stats")
def admin_synonyms_reset_stats(request: Request):
    from ..middleware.synonym_expansion import get_synonym_expander
    get_synonym_expander().reset_stats()
    return {"ok": True}


# ---- v2.81 Feedback learning -------------------------------------------
class FeedbackRecordRequest(BaseModel):
    query_id: str
    retriever: str
    feedback: str
    score: Optional[float] = None
    comment: Optional[str] = None


class FeedbackLRRequest(BaseModel):
    learning_rate: float


class FeedbackRegisterRequest(BaseModel):
    name: str
    initial_weight: float = 1.0


@app.get("/v1/admin/feedback")
def admin_feedback_get():
    from ..middleware.feedback_learning import get_feedback_learner
    return get_feedback_learner().stats()


@app.post("/v1/admin/feedback/record")
def admin_feedback_record(req: FeedbackRecordRequest, request: Request):
    from ..middleware.feedback_learning import get_feedback_learner
    try:
        get_feedback_learner().record(
            query_id=req.query_id,
            retriever=req.retriever,
            feedback=req.feedback,
            score=req.score,
            comment=req.comment,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/feedback/register")
def admin_feedback_register(req: FeedbackRegisterRequest,
                                 request: Request):
    from ..middleware.feedback_learning import get_feedback_learner
    try:
        get_feedback_learner().register_retriever(
            req.name, initial_weight=req.initial_weight)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "name": req.name,
            "initial_weight": req.initial_weight}


@app.get("/v1/admin/feedback/propose")
def admin_feedback_propose():
    from ..middleware.feedback_learning import get_feedback_learner
    updates = get_feedback_learner().propose_updates()
    return {"proposals": [u.to_dict() for u in updates]}


@app.post("/v1/admin/feedback/apply")
def admin_feedback_apply(request: Request):
    from ..middleware.feedback_learning import get_feedback_learner
    changed = get_feedback_learner().apply_updates()
    get_obs().audit(
        "feedback.apply_updates",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        n_changed=changed,
    )
    return {"ok": True, "changed": changed}


@app.post("/v1/admin/feedback/lr")
def admin_feedback_set_lr(req: FeedbackLRRequest, request: Request):
    from ..middleware.feedback_learning import get_feedback_learner
    try:
        get_feedback_learner().set_learning_rate(req.learning_rate)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "learning_rate": req.learning_rate}


@app.post("/v1/admin/feedback/reset")
def admin_feedback_reset(request: Request):
    from ..middleware.feedback_learning import get_feedback_learner
    get_feedback_learner().reset()
    return {"ok": True}


# ---- v2.82 Embedding compression ---------------------------------------
class EmbeddingCompressRequest(BaseModel):
    embedding: List[float]


class EmbeddingFloat16Request(BaseModel):
    use_float16: bool


@app.get("/v1/admin/embedding_compress")
def admin_embedding_compress_get():
    from ..middleware.embedding_compress import get_embedding_compressor
    return get_embedding_compressor().stats()


@app.post("/v1/admin/embedding_compress/mode")
def admin_embedding_compress_set_mode(req: EmbeddingFloat16Request,
                                           request: Request):
    from ..middleware.embedding_compress import get_embedding_compressor
    get_embedding_compressor().set_use_float16(req.use_float16)
    get_obs().audit(
        "embedding_compress.set_mode",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        use_float16=req.use_float16,
    )
    return {"ok": True, "use_float16": req.use_float16}


@app.post("/v1/admin/embedding_compress/try")
def admin_embedding_compress_try(req: EmbeddingCompressRequest):
    from ..middleware.embedding_compress import (
        get_embedding_compressor, cosine_similarity,
    )
    c = get_embedding_compressor()
    original = req.embedding
    blob = c.compress(original)
    reconstructed = c.decompress(blob).tolist()
    sim = cosine_similarity(original, reconstructed)
    return {
        "original_bytes": 4 * len(original),
        "compressed_bytes": len(blob),
        "ratio": round((4 * len(original)) / max(len(blob), 1), 4),
        "cosine_similarity": round(sim, 6),
        "dim": len(original),
    }


@app.post("/v1/admin/embedding_compress/reset")
def admin_embedding_compress_reset(request: Request):
    from ..middleware.embedding_compress import get_embedding_compressor
    get_embedding_compressor().reset()
    return {"ok": True}


# ---- v2.83 Phrase detector ---------------------------------------------
class PhraseTrainRequest(BaseModel):
    documents: List[str]
    verbose: bool = False


class PhraseTokenizeRequest(BaseModel):
    text: str


class PhraseConfigRequest(BaseModel):
    min_frequency: Optional[int] = None
    max_tau: Optional[float] = None
    max_phrase_length: Optional[int] = None


@app.get("/v1/admin/phrases")
def admin_phrases_get(limit: int = 100):
    from ..middleware.phrase_detector import get_phrase_detector
    d = get_phrase_detector()
    return {
        "stats": d.stats(),
        "phrases": d.phrases(limit=limit),
    }


@app.post("/v1/admin/phrases/train")
def admin_phrases_train(req: PhraseTrainRequest, request: Request):
    from ..middleware.phrase_detector import get_phrase_detector
    d = get_phrase_detector()
    discovered = d.train(req.documents, verbose=req.verbose)
    get_obs().audit(
        "phrases.train",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        n_docs=len(req.documents),
        n_discovered=len(discovered),
    )
    return {
        "ok": True,
        "n_docs": len(req.documents),
        "n_phrases_discovered": len(discovered),
    }


@app.post("/v1/admin/phrases/tokenize")
def admin_phrases_tokenize(req: PhraseTokenizeRequest):
    from ..middleware.phrase_detector import get_phrase_detector
    d = get_phrase_detector()
    tokens = d.tokenize(req.text)
    return {
        "tokens": tokens,
        "n_tokens": len(tokens),
        "applied_phrases": [t for t in tokens if "_" in t],
    }


@app.post("/v1/admin/phrases/config")
def admin_phrases_set_config(req: PhraseConfigRequest, request: Request):
    from ..middleware.phrase_detector import get_phrase_detector
    d = get_phrase_detector()
    try:
        if req.min_frequency is not None:
            d.set_min_frequency(req.min_frequency)
        if req.max_tau is not None:
            d.set_max_tau(req.max_tau)
        if req.max_phrase_length is not None:
            d.set_max_phrase_length(req.max_phrase_length)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "config": d.config()}


@app.post("/v1/admin/phrases/reset")
def admin_phrases_reset(request: Request):
    from ..middleware.phrase_detector import get_phrase_detector
    get_phrase_detector().reset()
    return {"ok": True}


# ---- v2.84 Meaning preservation ----------------------------------------
class MeaningValidateRequest(BaseModel):
    source: str
    target: str


class MeaningThresholdRequest(BaseModel):
    pass_threshold: float
    warn_threshold: float


class MeaningWeightRequest(BaseModel):
    role: str
    weight: float


class MeaningVocabRequest(BaseModel):
    role: str
    word: str


@app.get("/v1/admin/meaning")
def admin_meaning_get():
    from ..middleware.meaning_preservation import get_meaning_validator
    return get_meaning_validator().stats()


@app.post("/v1/admin/meaning/validate")
def admin_meaning_validate(req: MeaningValidateRequest):
    from ..middleware.meaning_preservation import get_meaning_validator
    result = get_meaning_validator().validate(req.source, req.target)
    return result.to_dict()


@app.post("/v1/admin/meaning/thresholds")
def admin_meaning_set_thresholds(req: MeaningThresholdRequest,
                                      request: Request):
    from ..middleware.meaning_preservation import get_meaning_validator
    try:
        get_meaning_validator().set_thresholds(
            req.pass_threshold, req.warn_threshold)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True,
            "pass": req.pass_threshold,
            "warn": req.warn_threshold}


@app.post("/v1/admin/meaning/weight")
def admin_meaning_set_weight(req: MeaningWeightRequest, request: Request):
    from ..middleware.meaning_preservation import get_meaning_validator
    try:
        get_meaning_validator().set_weight(req.role, req.weight)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "role": req.role, "weight": req.weight}


@app.post("/v1/admin/meaning/vocab/add")
def admin_meaning_vocab_add(req: MeaningVocabRequest, request: Request):
    from ..middleware.meaning_preservation import get_meaning_validator
    try:
        get_meaning_validator().add_role_word(req.role, req.word)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "role": req.role, "word": req.word}


@app.post("/v1/admin/meaning/reset")
def admin_meaning_reset(request: Request):
    from ..middleware.meaning_preservation import get_meaning_validator
    get_meaning_validator().reset()
    return {"ok": True}


# ---- v2.85 Retrieval explainability ------------------------------------
class ExplainVerbosityRequest(BaseModel):
    verbosity: str


class ExplainBatchRequest(BaseModel):
    query: str
    results: List[Dict[str, Any]]


@app.get("/v1/admin/retrieval_explain")
def admin_retrieval_explain_get():
    from ..middleware.retrieval_explain import get_retrieval_explainer
    return get_retrieval_explainer().stats()


@app.post("/v1/admin/retrieval_explain/verbosity")
def admin_retrieval_explain_set_verbosity(req: ExplainVerbosityRequest,
                                                request: Request):
    from ..middleware.retrieval_explain import get_retrieval_explainer
    try:
        get_retrieval_explainer().set_verbosity(req.verbosity)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "verbosity": req.verbosity}


@app.post("/v1/admin/retrieval_explain/batch")
def admin_retrieval_explain_batch(req: ExplainBatchRequest):
    from ..middleware.retrieval_explain import get_retrieval_explainer
    annotated = get_retrieval_explainer().explain_batch(
        req.query, req.results)
    return {"annotated": annotated}


# ---- v2.86 Smart chunking ----------------------------------------------
class ChunkerCapsRequest(BaseModel):
    hard_cap_chars: int
    soft_cap_chars: int


class ChunkerOverlapRequest(BaseModel):
    overlap_chars: int


class ChunkerModeRequest(BaseModel):
    mode: str


class ChunkRequest(BaseModel):
    text: str
    metadata: Optional[Dict[str, Any]] = None


@app.get("/v1/admin/chunker")
def admin_chunker_get():
    from ..middleware.smart_chunker import get_smart_chunker
    return get_smart_chunker().stats()


@app.post("/v1/admin/chunker/caps")
def admin_chunker_set_caps(req: ChunkerCapsRequest, request: Request):
    from ..middleware.smart_chunker import get_smart_chunker
    try:
        get_smart_chunker().set_caps(req.hard_cap_chars,
                                         req.soft_cap_chars)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/chunker/overlap")
def admin_chunker_set_overlap(req: ChunkerOverlapRequest,
                                   request: Request):
    from ..middleware.smart_chunker import get_smart_chunker
    try:
        get_smart_chunker().set_overlap(req.overlap_chars)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "overlap_chars": req.overlap_chars}


@app.post("/v1/admin/chunker/mode")
def admin_chunker_set_mode(req: ChunkerModeRequest, request: Request):
    from ..middleware.smart_chunker import get_smart_chunker
    try:
        get_smart_chunker().set_mode(req.mode)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "mode": req.mode}


@app.post("/v1/admin/chunker/chunk")
def admin_chunker_chunk(req: ChunkRequest):
    from ..middleware.smart_chunker import get_smart_chunker
    result = get_smart_chunker().chunk(req.text, metadata=req.metadata)
    return result.to_dict()


@app.post("/v1/admin/chunker/reset")
def admin_chunker_reset(request: Request):
    from ..middleware.smart_chunker import get_smart_chunker
    get_smart_chunker().reset()
    return {"ok": True}


# ---- v2.87 Query decomposition -----------------------------------------
class DecomposeEnableRequest(BaseModel):
    enabled: bool


class DecomposeRequest(BaseModel):
    query: str


@app.get("/v1/admin/decompose")
def admin_decompose_get():
    from ..middleware.query_decompose import get_query_decomposer
    return get_query_decomposer().stats()


@app.post("/v1/admin/decompose/enable")
def admin_decompose_enable(req: DecomposeEnableRequest, request: Request):
    from ..middleware.query_decompose import get_query_decomposer
    get_query_decomposer().set_enabled(req.enabled)
    get_obs().audit(
        "decompose.enable",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        enabled=req.enabled,
    )
    return {"ok": True, "enabled": req.enabled}


@app.post("/v1/admin/decompose/try")
def admin_decompose_try(req: DecomposeRequest):
    from ..middleware.query_decompose import get_query_decomposer
    result = get_query_decomposer().decompose(req.query)
    return result.to_dict()


@app.post("/v1/admin/decompose/reset")
def admin_decompose_reset(request: Request):
    from ..middleware.query_decompose import get_query_decomposer
    get_query_decomposer().reset()
    return {"ok": True}


# ---- v2.88 Cost estimator ----------------------------------------------
class CostEstimateRequest(BaseModel):
    retriever: str
    generator: str
    k: int
    system_tokens: int = 500
    query_tokens: int = 50
    max_output_tokens: int = 1000
    rerank: bool = False


class CostRetrieverRequest(BaseModel):
    name: str
    cost_per_doc_usd: float = 0.0
    latency_base_ms: float = 20.0
    latency_per_doc_ms: float = 2.0


class CostGeneratorRequest(BaseModel):
    name: str
    price_in_per_1k: float
    price_out_per_1k: float
    first_token_ms: float = 200.0
    per_token_ms: float = 15.0
    output_utilization: float = 0.7
    avg_chunk_tokens: int = 400


@app.get("/v1/admin/cost_estimator")
def admin_cost_estimator_get():
    from ..middleware.cost_estimator import get_cost_estimator
    e = get_cost_estimator()
    return {
        "stats": e.stats(),
        "retrievers": e.list_retrievers(),
        "generators": e.list_generators(),
    }


@app.post("/v1/admin/cost_estimator/estimate")
def admin_cost_estimator_estimate(req: CostEstimateRequest):
    from ..middleware.cost_estimator import get_cost_estimator
    try:
        breakdown = get_cost_estimator().estimate(
            retriever=req.retriever,
            generator=req.generator,
            k=req.k,
            system_tokens=req.system_tokens,
            query_tokens=req.query_tokens,
            max_output_tokens=req.max_output_tokens,
            rerank=req.rerank,
        )
    except (KeyError, ValueError) as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return breakdown.to_dict()


@app.post("/v1/admin/cost_estimator/retriever")
def admin_cost_estimator_register_retriever(req: CostRetrieverRequest,
                                                  request: Request):
    from ..middleware.cost_estimator import get_cost_estimator
    try:
        get_cost_estimator().register_retriever(
            req.name,
            cost_per_doc_usd=req.cost_per_doc_usd,
            latency_base_ms=req.latency_base_ms,
            latency_per_doc_ms=req.latency_per_doc_ms,
            replace=True,
        )
    except (ValueError, KeyError) as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "name": req.name}


@app.post("/v1/admin/cost_estimator/generator")
def admin_cost_estimator_register_generator(req: CostGeneratorRequest,
                                                  request: Request):
    from ..middleware.cost_estimator import get_cost_estimator
    try:
        get_cost_estimator().register_generator(
            req.name,
            price_in_per_1k=req.price_in_per_1k,
            price_out_per_1k=req.price_out_per_1k,
            first_token_ms=req.first_token_ms,
            per_token_ms=req.per_token_ms,
            output_utilization=req.output_utilization,
            avg_chunk_tokens=req.avg_chunk_tokens,
            replace=True,
        )
    except (ValueError, KeyError) as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "name": req.name}


@app.post("/v1/admin/cost_estimator/reset")
def admin_cost_estimator_reset(request: Request):
    from ..middleware.cost_estimator import get_cost_estimator
    get_cost_estimator().reset()
    return {"ok": True}


# ---- v2.89 Adaptive k selector -----------------------------------------
class AdaptiveKRecommendRequest(BaseModel):
    intent: Optional[str] = None
    intent_confidence: Optional[float] = None
    sub_queries: int = 1
    complexity_score: Optional[float] = None
    available_tokens: Optional[int] = None
    base_k_override: Optional[int] = None


class AdaptiveKBoundsRequest(BaseModel):
    min_k: int
    max_k: int


class AdaptiveKIntentRequest(BaseModel):
    intent: str
    k: int


@app.get("/v1/admin/adaptive_k")
def admin_adaptive_k_get():
    from ..middleware.adaptive_k import get_adaptive_k_selector
    return get_adaptive_k_selector().stats()


@app.post("/v1/admin/adaptive_k/recommend")
def admin_adaptive_k_recommend(req: AdaptiveKRecommendRequest):
    from ..middleware.adaptive_k import get_adaptive_k_selector
    rec = get_adaptive_k_selector().recommend(
        intent=req.intent,
        intent_confidence=req.intent_confidence,
        sub_queries=req.sub_queries,
        complexity_score=req.complexity_score,
        available_tokens=req.available_tokens,
        base_k_override=req.base_k_override,
    )
    return rec.to_dict()


@app.post("/v1/admin/adaptive_k/bounds")
def admin_adaptive_k_set_bounds(req: AdaptiveKBoundsRequest,
                                    request: Request):
    from ..middleware.adaptive_k import get_adaptive_k_selector
    try:
        get_adaptive_k_selector().set_bounds(req.min_k, req.max_k)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "min_k": req.min_k, "max_k": req.max_k}


@app.post("/v1/admin/adaptive_k/intent_k")
def admin_adaptive_k_set_intent_k(req: AdaptiveKIntentRequest,
                                       request: Request):
    from ..middleware.adaptive_k import get_adaptive_k_selector
    try:
        get_adaptive_k_selector().set_intent_k(req.intent, req.k)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "intent": req.intent, "k": req.k}


@app.post("/v1/admin/adaptive_k/reset")
def admin_adaptive_k_reset(request: Request):
    from ..middleware.adaptive_k import get_adaptive_k_selector
    get_adaptive_k_selector().reset()
    return {"ok": True}


# ---- v2.90 Pipeline trace recorder -------------------------------------
class TraceCapacityRequest(BaseModel):
    capacity: int


class TraceRetentionRequest(BaseModel):
    retention_sec: float


@app.get("/v1/admin/traces")
def admin_traces_list(limit: int = 50, active_only: bool = False):
    from ..middleware.pipeline_trace import get_pipeline_trace_recorder
    r = get_pipeline_trace_recorder()
    return {
        "stats": r.stats(),
        "traces": r.list_traces(limit=limit, active_only=active_only),
    }


@app.get("/v1/admin/traces/{trace_id}")
def admin_traces_get(trace_id: str):
    from ..middleware.pipeline_trace import get_pipeline_trace_recorder
    trace = get_pipeline_trace_recorder().get(trace_id)
    if trace is None:
        raise HTTPException(status_code=404,
                            detail={"error": "not found",
                                    "trace_id": trace_id})
    return trace


@app.post("/v1/admin/traces/capacity")
def admin_traces_set_capacity(req: TraceCapacityRequest,
                                    request: Request):
    from ..middleware.pipeline_trace import get_pipeline_trace_recorder
    try:
        get_pipeline_trace_recorder().set_capacity(req.capacity)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "capacity": req.capacity}


@app.post("/v1/admin/traces/retention")
def admin_traces_set_retention(req: TraceRetentionRequest,
                                     request: Request):
    from ..middleware.pipeline_trace import get_pipeline_trace_recorder
    try:
        get_pipeline_trace_recorder().set_retention(req.retention_sec)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "retention_sec": req.retention_sec}


@app.post("/v1/admin/traces/purge")
def admin_traces_purge(request: Request):
    from ..middleware.pipeline_trace import get_pipeline_trace_recorder
    n = get_pipeline_trace_recorder().purge_expired()
    return {"ok": True, "purged": n}


@app.post("/v1/admin/traces/reset")
def admin_traces_reset(request: Request):
    from ..middleware.pipeline_trace import get_pipeline_trace_recorder
    get_pipeline_trace_recorder().reset()
    return {"ok": True}


# ---- v2.91 A/B experiments ---------------------------------------------
class ABRegisterRequest(BaseModel):
    name: str
    variants: List[str]
    traffic_split: List[float]


class ABAssignRequest(BaseModel):
    name: str
    user_key: str


class ABRecordRequest(BaseModel):
    name: str
    variant: str
    metric: str
    value: float


@app.get("/v1/admin/ab")
def admin_ab_list():
    from ..middleware.ab_experiments import get_ab_manager
    m = get_ab_manager()
    return {
        "stats": m.stats(),
        "experiments": m.list_experiments(),
    }


@app.post("/v1/admin/ab")
def admin_ab_register(req: ABRegisterRequest, request: Request):
    from ..middleware.ab_experiments import get_ab_manager
    try:
        get_ab_manager().register(req.name, req.variants,
                                       req.traffic_split, replace=True)
    except (ValueError, KeyError) as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "ab.register",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        name=req.name, variants=req.variants,
    )
    return {"ok": True, "name": req.name}


@app.post("/v1/admin/ab/assign")
def admin_ab_assign(req: ABAssignRequest):
    from ..middleware.ab_experiments import get_ab_manager
    variant = get_ab_manager().assign(req.name, req.user_key)
    return {"variant": variant, "experiment": req.name,
            "user_key": req.user_key}


@app.post("/v1/admin/ab/record")
def admin_ab_record(req: ABRecordRequest):
    from ..middleware.ab_experiments import get_ab_manager
    ok = get_ab_manager().record(req.name, req.variant,
                                       req.metric, req.value)
    if not ok:
        raise HTTPException(status_code=404,
                            detail={"error": "experiment or variant "
                                              "not found"})
    return {"ok": True}


@app.get("/v1/admin/ab/{name}")
def admin_ab_get(name: str, metric: Optional[str] = None):
    from ..middleware.ab_experiments import get_ab_manager
    return get_ab_manager().report(name, metric=metric)


@app.post("/v1/admin/ab/{name}/pause")
def admin_ab_pause(name: str, request: Request):
    from ..middleware.ab_experiments import get_ab_manager
    if not get_ab_manager().pause(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {"ok": True, "name": name, "state": "paused"}


@app.post("/v1/admin/ab/{name}/resume")
def admin_ab_resume(name: str, request: Request):
    from ..middleware.ab_experiments import get_ab_manager
    if not get_ab_manager().resume(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {"ok": True, "name": name, "state": "active"}


@app.post("/v1/admin/ab/{name}/end")
def admin_ab_end(name: str, request: Request):
    from ..middleware.ab_experiments import get_ab_manager
    if not get_ab_manager().end(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {"ok": True, "name": name, "state": "ended"}


# ---- v2.92 Tenant quotas -----------------------------------------------
class TenantRegisterRequest(BaseModel):
    tenant_id: str
    daily_request_limit: Optional[int] = None
    monthly_cost_usd_limit: Optional[float] = None
    per_request_max_usd: Optional[float] = None
    warn_threshold: float = 0.80


class TenantUpdateRequest(BaseModel):
    tenant_id: str
    daily_request_limit: Optional[int] = None
    monthly_cost_usd_limit: Optional[float] = None
    per_request_max_usd: Optional[float] = None
    warn_threshold: Optional[float] = None


class TenantCheckRequest(BaseModel):
    tenant_id: str
    estimated_cost_usd: float = 0.0


class TenantRecordRequest(BaseModel):
    tenant_id: str
    cost_usd: float = 0.0


@app.get("/v1/admin/tenants")
def admin_tenants_list():
    from ..middleware.tenant_quotas import get_tenant_quota_store
    s = get_tenant_quota_store()
    return {
        "stats": s.stats(),
        "tenants": s.list_tenants(),
    }


@app.post("/v1/admin/tenants")
def admin_tenants_register(req: TenantRegisterRequest, request: Request):
    from ..middleware.tenant_quotas import get_tenant_quota_store
    try:
        get_tenant_quota_store().register(
            req.tenant_id,
            daily_request_limit=req.daily_request_limit,
            monthly_cost_usd_limit=req.monthly_cost_usd_limit,
            per_request_max_usd=req.per_request_max_usd,
            warn_threshold=req.warn_threshold,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "tenant_id": req.tenant_id}


@app.post("/v1/admin/tenants/update")
def admin_tenants_update(req: TenantUpdateRequest, request: Request):
    from ..middleware.tenant_quotas import get_tenant_quota_store
    try:
        ok = get_tenant_quota_store().update_limits(
            req.tenant_id,
            daily_request_limit=req.daily_request_limit,
            monthly_cost_usd_limit=req.monthly_cost_usd_limit,
            per_request_max_usd=req.per_request_max_usd,
            warn_threshold=req.warn_threshold,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    if not ok:
        raise HTTPException(status_code=404,
                            detail={"error": "tenant not found"})
    return {"ok": True}


@app.get("/v1/admin/tenants/{tenant_id}")
def admin_tenants_get(tenant_id: str):
    from ..middleware.tenant_quotas import get_tenant_quota_store
    t = get_tenant_quota_store().tenant(tenant_id)
    if t is None:
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return t


@app.post("/v1/admin/tenants/check")
def admin_tenants_check(req: TenantCheckRequest):
    from ..middleware.tenant_quotas import get_tenant_quota_store
    dec = get_tenant_quota_store().check(
        req.tenant_id, estimated_cost_usd=req.estimated_cost_usd)
    return dec.to_dict()


@app.post("/v1/admin/tenants/record")
def admin_tenants_record(req: TenantRecordRequest):
    from ..middleware.tenant_quotas import get_tenant_quota_store
    ok = get_tenant_quota_store().record(
        req.tenant_id, cost_usd=req.cost_usd)
    if not ok:
        raise HTTPException(status_code=404,
                            detail={"error": "tenant not found"})
    return {"ok": True}


@app.delete("/v1/admin/tenants/{tenant_id}")
def admin_tenants_delete(tenant_id: str, request: Request):
    from ..middleware.tenant_quotas import get_tenant_quota_store
    if not get_tenant_quota_store().unregister(tenant_id):
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {"ok": True, "tenant_id": tenant_id, "removed": True}


# ---- v2.93 Multi-hop retrieval -----------------------------------------
class MultiHopConfigRequest(BaseModel):
    max_hops: Optional[int] = None
    k: Optional[int] = None
    max_entities_per_hop: Optional[int] = None


@app.get("/v1/admin/multihop")
def admin_multihop_get():
    from ..middleware.multihop_retrieve import get_multihop_retriever
    r = get_multihop_retriever()
    return {"config": r.config(), "stats": r.stats()}


@app.post("/v1/admin/multihop/config")
def admin_multihop_config(req: MultiHopConfigRequest, request: Request):
    from ..middleware.multihop_retrieve import get_multihop_retriever
    try:
        get_multihop_retriever().set_defaults(
            max_hops=req.max_hops,
            k=req.k,
            max_entities_per_hop=req.max_entities_per_hop,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/multihop/reset")
def admin_multihop_reset(request: Request):
    from ..middleware.multihop_retrieve import get_multihop_retriever
    get_multihop_retriever().reset()
    return {"ok": True}


# ---- v2.94 Snippet extraction ------------------------------------------
class SnippetExtractRequest(BaseModel):
    query: str
    doc_text: str
    doc_id: Optional[str] = None


class SnippetBatchRequest(BaseModel):
    query: str
    docs: List[Dict[str, Any]]


class SnippetConfigRequest(BaseModel):
    top_sentences: Optional[int] = None
    max_snippet_chars: Optional[int] = None


@app.get("/v1/admin/snippets")
def admin_snippets_get():
    from ..middleware.snippet_extract import get_snippet_extractor
    return get_snippet_extractor().stats()


@app.post("/v1/admin/snippets/extract")
def admin_snippets_extract(req: SnippetExtractRequest):
    from ..middleware.snippet_extract import get_snippet_extractor
    snip = get_snippet_extractor().extract(
        req.query, req.doc_text, doc_id=req.doc_id)
    return snip.to_dict()


@app.post("/v1/admin/snippets/batch")
def admin_snippets_batch(req: SnippetBatchRequest):
    from ..middleware.snippet_extract import get_snippet_extractor
    return {"docs": get_snippet_extractor().extract_batch(
        req.query, req.docs)}


@app.post("/v1/admin/snippets/config")
def admin_snippets_config(req: SnippetConfigRequest, request: Request):
    from ..middleware.snippet_extract import get_snippet_extractor
    s = get_snippet_extractor()
    try:
        if req.top_sentences is not None:
            s.set_top_sentences(req.top_sentences)
        if req.max_snippet_chars is not None:
            s.set_max_snippet_chars(req.max_snippet_chars)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "config": s.config()}


# ---- v2.95 Answer grounding --------------------------------------------
class GroundingVerifyRequest(BaseModel):
    answer: str
    documents: List[Dict[str, Any]]


class GroundingThresholdsRequest(BaseModel):
    pass_threshold: float
    warn_threshold: float


class GroundingSupportRequest(BaseModel):
    support_threshold: float


@app.get("/v1/admin/grounding")
def admin_grounding_get():
    from ..middleware.answer_grounding import get_grounding_verifier
    return get_grounding_verifier().stats()


@app.post("/v1/admin/grounding/verify")
def admin_grounding_verify(req: GroundingVerifyRequest):
    from ..middleware.answer_grounding import get_grounding_verifier
    r = get_grounding_verifier().verify(req.answer, req.documents)
    return r.to_dict()


@app.post("/v1/admin/grounding/thresholds")
def admin_grounding_thresholds(req: GroundingThresholdsRequest,
                                     request: Request):
    from ..middleware.answer_grounding import get_grounding_verifier
    try:
        get_grounding_verifier().set_thresholds(
            req.pass_threshold, req.warn_threshold)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/grounding/support")
def admin_grounding_support(req: GroundingSupportRequest, request: Request):
    from ..middleware.answer_grounding import get_grounding_verifier
    try:
        get_grounding_verifier().set_support_threshold(
            req.support_threshold)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


# ---- v2.96 Corpus drift ------------------------------------------------
class DriftSnapshotRequest(BaseModel):
    label: str
    docs: List[Dict[str, Any]]


class DriftThresholdsRequest(BaseModel):
    warn: float
    alert: float


class DriftReferenceRequest(BaseModel):
    label: str


@app.get("/v1/admin/drift")
def admin_drift_get():
    from ..middleware.corpus_drift import get_corpus_drift_monitor
    m = get_corpus_drift_monitor()
    return {
        "stats": m.stats(),
        "snapshots": m.list_snapshots(),
    }


@app.post("/v1/admin/drift/snapshot")
def admin_drift_snapshot(req: DriftSnapshotRequest, request: Request):
    from ..middleware.corpus_drift import get_corpus_drift_monitor
    try:
        info = get_corpus_drift_monitor().snapshot(req.label, req.docs)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return info


@app.post("/v1/admin/drift/diff")
def admin_drift_diff(reference: Optional[str] = None,
                        current: Optional[str] = None):
    from ..middleware.corpus_drift import get_corpus_drift_monitor
    try:
        r = get_corpus_drift_monitor().diff(
            reference=reference, current=current)
    except (ValueError, KeyError) as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return r.to_dict()


@app.post("/v1/admin/drift/reference")
def admin_drift_set_reference(req: DriftReferenceRequest,
                                  request: Request):
    from ..middleware.corpus_drift import get_corpus_drift_monitor
    if not get_corpus_drift_monitor().set_reference(req.label):
        raise HTTPException(status_code=404,
                            detail={"error": "snapshot not found"})
    return {"ok": True, "reference": req.label}


@app.post("/v1/admin/drift/thresholds")
def admin_drift_thresholds(req: DriftThresholdsRequest, request: Request):
    from ..middleware.corpus_drift import get_corpus_drift_monitor
    try:
        get_corpus_drift_monitor().set_thresholds(req.warn, req.alert)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/drift/reset")
def admin_drift_reset(request: Request):
    from ..middleware.corpus_drift import get_corpus_drift_monitor
    get_corpus_drift_monitor().reset()
    return {"ok": True}


# ---- v2.97 Prometheus /metrics ----------------------------------------
from fastapi.responses import PlainTextResponse


@app.get("/metrics/prometheus", response_class=PlainTextResponse)
def prometheus_metrics():
    """Prometheus text exposition format — scraped by Prometheus/Grafana."""
    from ..middleware.prom_exporter import get_prometheus_exporter
    return get_prometheus_exporter().render()


@app.get("/v1/admin/prometheus")
def admin_prometheus_get():
    from ..middleware.prom_exporter import get_prometheus_exporter
    e = get_prometheus_exporter()
    return {"stats": e.stats(), "metrics": e.list_metrics()}


@app.post("/v1/admin/prometheus/reset")
def admin_prometheus_reset(request: Request):
    from ..middleware.prom_exporter import get_prometheus_exporter
    get_prometheus_exporter().reset()
    return {"ok": True}


# ---- v2.98 Request replay ---------------------------------------------
class ReplayRecordRequest(BaseModel):
    request_id: str
    endpoint: str
    query: str
    k: int = 5
    intent: Optional[str] = None
    retriever: Optional[str] = None
    latency_ms: Optional[float] = None
    cost_usd: Optional[float] = None
    success: bool = True
    error: Optional[str] = None
    tags: Optional[Dict[str, Any]] = None


class ReplayCapacityRequest(BaseModel):
    capacity: int


@app.get("/v1/admin/replay")
def admin_replay_get():
    from ..middleware.request_replay import get_replay_recorder
    return get_replay_recorder().stats()


@app.post("/v1/admin/replay_store/record")
def admin_replay_record(req: ReplayRecordRequest):
    from ..middleware.request_replay import get_replay_recorder
    get_replay_recorder().record(
        request_id=req.request_id,
        endpoint=req.endpoint,
        query=req.query,
        k=req.k,
        intent=req.intent,
        retriever=req.retriever,
        latency_ms=req.latency_ms,
        cost_usd=req.cost_usd,
        success=req.success,
        error=req.error,
        tags=req.tags,
    )
    return {"ok": True}


@app.get("/v1/admin/replay_store/records")
def admin_replay_records(limit: int = 100,
                             success_only: bool = False,
                             endpoint: Optional[str] = None):
    from ..middleware.request_replay import get_replay_recorder
    return {
        "records": get_replay_recorder().list_records(
            limit=limit, success_only=success_only,
            endpoint=endpoint),
    }


@app.get("/v1/admin/replay_store/record/{request_id}")
def admin_replay_get_one(request_id: str):
    from ..middleware.request_replay import get_replay_recorder
    rec = get_replay_recorder().get(request_id)
    if rec is None:
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return rec


@app.post("/v1/admin/replay_store/capacity")
def admin_replay_capacity(req: ReplayCapacityRequest, request: Request):
    from ..middleware.request_replay import get_replay_recorder
    try:
        get_replay_recorder().set_capacity(req.capacity)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/replay_store/reset")
def admin_replay_reset(request: Request):
    from ..middleware.request_replay import get_replay_recorder
    get_replay_recorder().reset()
    return {"ok": True}


# ---- v2.99 Composite health -------------------------------------------
class HealthCacheRequest(BaseModel):
    ttl_sec: float


@app.get("/v1/health/composite")
def health_composite(use_cache: bool = True):
    """Public health endpoint — aggregates across subsystems."""
    from ..middleware.composite_health import get_composite_health
    r = get_composite_health().evaluate(use_cache=use_cache)
    return r.to_dict()


@app.get("/v1/admin/health/composite")
def admin_health_composite_get():
    from ..middleware.composite_health import get_composite_health
    return get_composite_health().stats()


@app.post("/v1/admin/health/composite/cache_ttl")
def admin_health_cache_ttl(req: HealthCacheRequest, request: Request):
    from ..middleware.composite_health import get_composite_health
    try:
        get_composite_health().set_cache_ttl(req.ttl_sec)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/health/composite/reset")
def admin_health_composite_reset(request: Request):
    from ..middleware.composite_health import get_composite_health
    get_composite_health().reset()
    return {"ok": True}


# ---- v3.0 Platform manifest --------------------------------------------
@app.get("/v1/platform/manifest")
def platform_manifest_get(
    category: Optional[str] = None,
    since_prefix: Optional[str] = None,
    ported_only: bool = False,
):
    """Catalog of all features in the platform."""
    from ..middleware.platform_manifest import get_platform_manifest
    m = get_platform_manifest()
    if category or since_prefix or ported_only:
        return {
            "platform_version": m.version(),
            "features": m.list(
                category=category,
                since_prefix=since_prefix,
                ported_only=ported_only),
        }
    return m.full_manifest()


@app.get("/v1/platform/manifest/summary")
def platform_manifest_summary():
    from ..middleware.platform_manifest import get_platform_manifest
    m = get_platform_manifest()
    s = m.summary()
    s["categories"] = m.categories()
    return s


@app.get("/v1/platform/manifest/{flag}")
def platform_manifest_flag(flag: str):
    from ..middleware.platform_manifest import get_platform_manifest
    f = get_platform_manifest().get(flag)
    if f is None:
        raise HTTPException(status_code=404,
                            detail={"error": "flag not found"})
    return f


# ---- v3.1 Self-diagnose -----------------------------------------------
@app.get("/v1/platform/diagnose")
def platform_diagnose():
    """Run a full system self-diagnosis and return a report."""
    from ..middleware.self_diagnose import get_self_diagnose
    return get_self_diagnose().run().to_dict()


@app.get("/v1/platform/diagnose/stats")
def platform_diagnose_stats():
    from ..middleware.self_diagnose import get_self_diagnose
    return get_self_diagnose().stats()


# ---- v3.2 Changelog ---------------------------------------------------
# ---- v3.3 HTML dashboard ----------------------------------------------
from fastapi.responses import HTMLResponse as _HTMLResponse


@app.get("/v1/platform/dashboard", response_class=_HTMLResponse)
def platform_dashboard():
    """Self-contained HTML dashboard of the whole platform state."""
    from ..middleware.html_dashboard import get_html_dashboard
    return get_html_dashboard().render()


# ---- v3.4 Compliance bundle -------------------------------------------
class ComplianceSignerRequest(BaseModel):
    key: str
    signer_name: Optional[str] = None


@app.post("/v1/platform/compliance/generate")
def platform_compliance_generate():
    """Generate a signed audit bundle (JSON)."""
    from ..middleware.compliance_bundle import get_compliance_bundle_generator
    bundle = get_compliance_bundle_generator().generate()
    return bundle.to_dict()


@app.post("/v1/platform/compliance/verify")
def platform_compliance_verify(bundle: Dict[str, Any]):
    """Verify a bundle's signature against the current signing key."""
    from ..middleware.compliance_bundle import get_compliance_bundle_generator
    return get_compliance_bundle_generator().verify(bundle)


@app.post("/v1/platform/compliance/signing_key")
def platform_compliance_set_signing_key(req: ComplianceSignerRequest,
                                              request: Request):
    from ..middleware.compliance_bundle import get_compliance_bundle_generator
    try:
        get_compliance_bundle_generator().set_signing_key(
            req.key, signer_name=req.signer_name)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "compliance.signing_key_set",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        signer=req.signer_name,
    )
    return {"ok": True, "signer": req.signer_name}


@app.get("/v1/platform/compliance/stats")
def platform_compliance_stats():
    from ..middleware.compliance_bundle import get_compliance_bundle_generator
    return get_compliance_bundle_generator().stats()


# ---- v3.5 Middleware SDK ----------------------------------------------
class ScaffoldRequest(BaseModel):
    name: str
    category: str = "generic"
    description: str = ""


@app.get("/v1/platform/middleware")
def platform_middleware_list():
    """List all middleware registered via the SDK."""
    from ..middleware.middleware_sdk import get_middleware_registry
    return {"middleware": get_middleware_registry().list()}


@app.post("/v1/platform/middleware/scaffold")
def platform_middleware_scaffold(req: ScaffoldRequest):
    """Generate Python source for a new middleware module."""
    from ..middleware.middleware_sdk import scaffold_middleware_source
    try:
        source = scaffold_middleware_source(
            req.name, category=req.category,
            description=req.description)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"name": req.name, "source": source,
            "length_lines": source.count("\n")}


@app.post("/v1/platform/middleware/reset_all")
def platform_middleware_reset_all(request: Request):
    from ..middleware.middleware_sdk import get_middleware_registry
    n = get_middleware_registry().reset_all()
    return {"ok": True, "reset_count": n}


# ---- v3.6 Eval harness ------------------------------------------------
class EvalCaseRequest(BaseModel):
    case_id: str
    query: str
    expected_doc_ids: List[str] = []
    expected_substrings: List[str] = []
    min_confidence: float = 0.0
    k: int = 5
    tags: List[str] = []


class EvalWeightsRequest(BaseModel):
    weights: Dict[str, float]


class EvalRunRequest(BaseModel):
    case_ids: Optional[List[str]] = None


@app.get("/v1/admin/eval")
def admin_eval_get():
    from ..middleware.eval_harness import get_eval_harness
    h = get_eval_harness()
    return {
        "stats": h.stats(),
        "cases": h.list_cases(),
    }


@app.post("/v1/admin/eval/case")
def admin_eval_add_case(req: EvalCaseRequest, request: Request):
    from ..middleware.eval_harness import get_eval_harness, TestCase
    try:
        get_eval_harness().add_case(TestCase(
            case_id=req.case_id,
            query=req.query,
            expected_doc_ids=req.expected_doc_ids,
            expected_substrings=req.expected_substrings,
            min_confidence=req.min_confidence,
            k=req.k,
            tags=req.tags,
        ), replace=True)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "case_id": req.case_id}


@app.delete("/v1/admin/eval/case/{case_id}")
def admin_eval_remove_case(case_id: str, request: Request):
    from ..middleware.eval_harness import get_eval_harness
    if not get_eval_harness().remove_case(case_id):
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {"ok": True, "case_id": case_id, "removed": True}


@app.post("/v1/admin/eval/weights")
def admin_eval_set_weights(req: EvalWeightsRequest, request: Request):
    from ..middleware.eval_harness import get_eval_harness
    try:
        get_eval_harness().set_weights(req.weights)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/eval/run")
def admin_eval_run(req: EvalRunRequest, request: Request):
    from ..middleware.eval_harness import get_eval_harness
    try:
        report = get_eval_harness().run(case_ids=req.case_ids)
    except RuntimeError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


# ---- v3.7 Session persistence -----------------------------------------
class SessionSnapshotRequest(BaseModel):
    sessions: List[Dict[str, Any]]


class SessionPersistPathRequest(BaseModel):
    path: str
    sessions: Optional[List[Dict[str, Any]]] = None


class SessionPersistFormatRequest(BaseModel):
    format: str


@app.get("/v1/admin/session_persist")
def admin_session_persist_get():
    from ..middleware.session_persistence import get_session_persistence
    return get_session_persistence().stats()


@app.post("/v1/admin/session_persist/snapshot")
def admin_session_persist_snapshot(req: SessionSnapshotRequest):
    from ..middleware.session_persistence import get_session_persistence
    data = get_session_persistence().snapshot(req.sessions)
    return {
        "bytes": len(data),
        "n_sessions": len(req.sessions),
    }


@app.post("/v1/admin/session_persist/save")
def admin_session_persist_save(req: SessionPersistPathRequest,
                                    request: Request):
    from ..middleware.session_persistence import get_session_persistence
    p = get_session_persistence()
    sessions = req.sessions
    if sessions is None:
        # Pull from the real session store
        try:
            from ..memory import get_store
            sessions = [s.to_dict() for s in get_store()._sessions.values()]
        except Exception:
            sessions = []
    try:
        meta = p.save_to(req.path, sessions)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    get_obs().audit(
        "session_persist.save",
        actor_key=request.headers.get("x-api-key"),
        request_id=getattr(request.state, "request_id", None),
        path=req.path, n_sessions=len(sessions),
    )
    return meta


@app.post("/v1/admin/session_persist/load")
def admin_session_persist_load(path: str, request: Request):
    from ..middleware.session_persistence import get_session_persistence
    p = get_session_persistence()
    try:
        parsed = p.load_from(path)
    except FileNotFoundError as e:
        raise HTTPException(status_code=404,
                            detail={"error": str(e)})
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return parsed


@app.post("/v1/admin/session_persist/format")
def admin_session_persist_format(req: SessionPersistFormatRequest,
                                      request: Request):
    from ..middleware.session_persistence import get_session_persistence
    try:
        get_session_persistence().set_format(req.format)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "format": req.format}


# ---- v3.8 Event bus ---------------------------------------------------
class EventPublishRequest(BaseModel):
    event_type: str
    payload: Optional[Dict[str, Any]] = None
    source: Optional[str] = None


@app.get("/v1/admin/events")
def admin_events_get():
    from ..middleware.event_bus import get_event_bus
    bus = get_event_bus()
    return {
        "stats": bus.stats(),
        "subscriptions": bus.list_subscriptions(),
    }


@app.post("/v1/admin/events/publish")
def admin_events_publish(req: EventPublishRequest, request: Request):
    from ..middleware.event_bus import get_event_bus
    try:
        event = get_event_bus().publish(
            req.event_type,
            payload=req.payload,
            source=req.source or "admin_api",
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return event.to_dict()


@app.get("/v1/admin/events/history")
def admin_events_history(limit: int = 50,
                              event_type: Optional[str] = None):
    from ..middleware.event_bus import get_event_bus
    return {
        "events": get_event_bus().history(
            limit=limit, event_type=event_type),
    }


@app.delete("/v1/admin/events/subscription/{subscription_id}")
def admin_events_unsubscribe(subscription_id: str, request: Request):
    from ..middleware.event_bus import get_event_bus
    if not get_event_bus().unsubscribe(subscription_id):
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {"ok": True, "subscription_id": subscription_id}


@app.post("/v1/admin/events/reset")
def admin_events_reset(request: Request):
    from ..middleware.event_bus import get_event_bus
    get_event_bus().reset()
    return {"ok": True}


# ---- v3.9 Graph retriever --------------------------------------------
class GraphEdgeRequest(BaseModel):
    from_id: str
    to_id: str
    relation: str = "cites"
    weight: Optional[float] = None
    metadata: Optional[Dict[str, Any]] = None


class GraphExpandRequest(BaseModel):
    seed: List[Dict[str, Any]]
    max_depth: Optional[int] = None
    decay: Optional[float] = None
    top_k: Optional[int] = None
    allowed_relations: Optional[List[str]] = None


@app.get("/v1/admin/graph")
def admin_graph_stats():
    from ..middleware.graph_retriever import get_graph_retriever
    return get_graph_retriever().stats()


@app.post("/v1/admin/graph/edge")
def admin_graph_add_edge(req: GraphEdgeRequest, request: Request):
    from ..middleware.graph_retriever import get_graph_retriever
    try:
        get_graph_retriever().add_edge(
            from_id=req.from_id, to_id=req.to_id,
            relation=req.relation, weight=req.weight,
            metadata=req.metadata,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.get("/v1/admin/graph/neighbors/{doc_id}")
def admin_graph_neighbors(doc_id: str, relation: Optional[str] = None):
    from ..middleware.graph_retriever import get_graph_retriever
    return {
        "doc_id": doc_id,
        "neighbors": get_graph_retriever().neighbors(doc_id,
                                                       relation=relation),
    }


@app.post("/v1/admin/graph/expand")
def admin_graph_expand(req: GraphExpandRequest, request: Request):
    from ..middleware.graph_retriever import get_graph_retriever
    try:
        result = get_graph_retriever().expand(
            seed=req.seed,
            max_depth=req.max_depth,
            decay=req.decay,
            top_k=req.top_k,
            allowed_relations=req.allowed_relations,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/graph/reset")
def admin_graph_reset(request: Request):
    from ..middleware.graph_retriever import get_graph_retriever
    get_graph_retriever().reset()
    return {"ok": True}


# ---- v3.10 Federated retrieval ---------------------------------------
class FederatedQueryRequest(BaseModel):
    query: str
    top_k: Optional[int] = None
    fusion: Optional[str] = None
    only: Optional[List[str]] = None


@app.get("/v1/admin/federated")
def admin_federated_stats():
    from ..middleware.federated_retrieval import get_federated_retriever
    r = get_federated_retriever()
    return {
        "stats": r.stats(),
        "sources": r.list_sources(),
    }


@app.post("/v1/admin/federated/query")
def admin_federated_query(req: FederatedQueryRequest, request: Request):
    from ..middleware.federated_retrieval import get_federated_retriever
    try:
        result = get_federated_retriever().query(
            query=req.query, top_k=req.top_k,
            fusion=req.fusion, only=req.only,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/federated/source/{name}/enable")
def admin_federated_enable(name: str, enabled: bool, request: Request):
    from ..middleware.federated_retrieval import get_federated_retriever
    if not get_federated_retriever().set_enabled(name, enabled):
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {"ok": True, "name": name, "enabled": enabled}


@app.post("/v1/admin/federated/reset")
def admin_federated_reset(request: Request):
    from ..middleware.federated_retrieval import get_federated_retriever
    get_federated_retriever().reset()
    return {"ok": True}


# ---- v3.11 Audit export ----------------------------------------------
@app.get("/v1/admin/audit_export")
def admin_audit_export_stats():
    from ..middleware.audit_export import get_audit_exporter
    e = get_audit_exporter()
    return {
        "stats": e.stats(),
        "cursors": e.list_cursors(),
    }


@app.get("/v1/admin/audit_export/cursor/{name}")
def admin_audit_export_cursor(name: str):
    from ..middleware.audit_export import get_audit_exporter
    c = get_audit_exporter().get_cursor(name)
    if c is None:
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return c


@app.delete("/v1/admin/audit_export/cursor/{name}")
def admin_audit_export_reset_cursor(name: str, request: Request):
    from ..middleware.audit_export import get_audit_exporter
    if not get_audit_exporter().reset_cursor(name):
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {"ok": True, "name": name}


@app.post("/v1/admin/audit_export/reset")
def admin_audit_export_reset(request: Request):
    from ..middleware.audit_export import get_audit_exporter
    get_audit_exporter().reset()
    return {"ok": True}


# ---- v3.12 Graph builder --------------------------------------------
class GraphBuildRequest(BaseModel):
    docs: List[Dict[str, Any]]
    apply_to_graph: bool = False


@app.get("/v1/admin/graph_builder")
def admin_graph_builder_stats():
    from ..middleware.graph_builder import get_graph_builder
    return get_graph_builder().stats()


@app.post("/v1/admin/graph_builder/build")
def admin_graph_builder_build(req: GraphBuildRequest, request: Request):
    from ..middleware.graph_builder import get_graph_builder
    try:
        proposals, stats = get_graph_builder().build_from_docs(
            docs=req.docs, apply_to_graph=req.apply_to_graph,
        )
    except RuntimeError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {
        "stats": stats.to_dict(),
        "proposals": [p.to_dict() for p in proposals],
    }


@app.post("/v1/admin/graph_builder/reset")
def admin_graph_builder_reset(request: Request):
    from ..middleware.graph_builder import get_graph_builder
    get_graph_builder().reset()
    return {"ok": True}


# ---- v3.13 Answer templates -----------------------------------------
class TemplateRenderRequest(BaseModel):
    template_id: str
    slots: Dict[str, str]


class TemplateRenderAutoRequest(BaseModel):
    intent: str
    lang: str = "he"
    slots: Dict[str, str]


@app.get("/v1/admin/answer_templates")
def admin_answer_templates_list():
    from ..middleware.answer_templates import get_answer_templates
    s = get_answer_templates()
    return {
        "stats": s.stats(),
        "templates": s.list(),
    }


@app.get("/v1/admin/answer_templates/{template_id}")
def admin_answer_template_get(template_id: str):
    from ..middleware.answer_templates import get_answer_templates
    tpl = get_answer_templates().get(template_id)
    if tpl is None:
        raise HTTPException(status_code=404,
                            detail={"error": "not found"})
    return {
        "template_id": tpl.template_id,
        "intent": tpl.intent,
        "lang": tpl.lang,
        "title_md": tpl.title_md,
        "sections": [
            {"heading": s.get("heading", ""),
              "body": s.get("body", ""),
              "slots": [
                  {"name": sl.name, "flag": sl.flag,
                    "description": sl.description,
                    "max_chars": sl.max_chars}
                  for sl in (
                      s.get("slots", []))
              ]}
            for s in tpl.sections
        ],
        "footer_md": tpl.footer_md,
    }


@app.post("/v1/admin/answer_templates/render")
def admin_answer_template_render(req: TemplateRenderRequest,
                                    request: Request):
    from ..middleware.answer_templates import get_answer_templates
    try:
        report = get_answer_templates().render(
            template_id=req.template_id, slots=req.slots)
    except ValueError as e:
        raise HTTPException(status_code=404, detail={"error": str(e)})
    return {
        "report": report.to_dict(),
        "rendered": report.rendered,
    }


@app.post("/v1/admin/answer_templates/render_auto")
def admin_answer_template_render_auto(req: TemplateRenderAutoRequest,
                                          request: Request):
    from ..middleware.answer_templates import get_answer_templates
    report = get_answer_templates().render_auto(
        intent=req.intent, slots=req.slots, lang=req.lang)
    if report is None:
        raise HTTPException(status_code=404,
                            detail={"error":
                                    f"no template for intent={req.intent} "
                                    f"lang={req.lang}"})
    return {
        "report": report.to_dict(),
        "rendered": report.rendered,
    }


@app.post("/v1/admin/answer_templates/reset")
def admin_answer_templates_reset(request: Request):
    from ..middleware.answer_templates import (
        AnswerTemplateStore, set_answer_templates,
    )
    # Full reset with defaults re-registered
    set_answer_templates(AnswerTemplateStore())
    return {"ok": True}


# ---- v3.14 Clarification planner ------------------------------------
class ClarifyRequest(BaseModel):
    query: str
    retrieval_count: Optional[int] = None
    intent_top2: Optional[List[Dict[str, Any]]] = None


@app.get("/v1/admin/clarify")
def admin_clarify_stats():
    from ..middleware.clarification import get_clarification_planner
    p = get_clarification_planner()
    return {
        "stats": p.stats(),
        "polysemous_terms": p.list_polysemous_terms(),
    }


@app.post("/v1/admin/clarify/analyze")
def admin_clarify_analyze(req: ClarifyRequest, request: Request):
    from ..middleware.clarification import get_clarification_planner
    try:
        report = get_clarification_planner().analyze(
            query=req.query,
            retrieval_count=req.retrieval_count,
            intent_top2=[{"intent": x.get("intent", ""),
                            "confidence": float(x.get("confidence", 0))}
                          for x in (req.intent_top2 or [])] or None,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.post("/v1/admin/clarify/reset")
def admin_clarify_reset(request: Request):
    from ..middleware.clarification import get_clarification_planner
    get_clarification_planner().reset()
    return {"ok": True}


# ---- v3.15 Citation parser ------------------------------------------
class CitationParseRequest(BaseModel):
    text: str


@app.get("/v1/admin/citation_parser")
def admin_citation_parser_stats():
    from ..middleware.citation_parser import get_citation_parser
    return get_citation_parser().stats()


@app.post("/v1/admin/citation_parser/parse")
def admin_citation_parser_parse(req: CitationParseRequest,
                                    request: Request):
    from ..middleware.citation_parser import get_citation_parser
    result = get_citation_parser().parse(req.text)
    return result.to_dict()


@app.post("/v1/admin/citation_parser/reset")
def admin_citation_parser_reset(request: Request):
    from ..middleware.citation_parser import get_citation_parser
    get_citation_parser().reset()
    return {"ok": True}


# ---- v3.16 Spell correction -----------------------------------------
class SpellCorrectRequest(BaseModel):
    query: str


class VocabAddRequest(BaseModel):
    words: List[str]


@app.get("/v1/admin/spell_correct")
def admin_spell_correct_stats():
    from ..middleware.spell_correct import get_spell_corrector
    return get_spell_corrector().stats()


@app.post("/v1/admin/spell_correct/correct")
def admin_spell_correct_correct(req: SpellCorrectRequest,
                                    request: Request):
    from ..middleware.spell_correct import get_spell_corrector
    report = get_spell_corrector().correct(req.query)
    return report.to_dict()


@app.post("/v1/admin/spell_correct/vocab")
def admin_spell_correct_add_vocab(req: VocabAddRequest,
                                      request: Request):
    from ..middleware.spell_correct import get_spell_corrector
    added = get_spell_corrector().add_words(req.words)
    return {"ok": True, "added": added}


@app.post("/v1/admin/spell_correct/reset")
def admin_spell_correct_reset(request: Request):
    from ..middleware.spell_correct import get_spell_corrector
    get_spell_corrector().reset()
    return {"ok": True}


# ---- v3.17 Retrieval budget -----------------------------------------
class BudgetProfileRequest(BaseModel):
    name: str
    unit_cost: float
    base_cost: float = 0.0
    quality: float = 1.0
    replace: bool = False


class BudgetPlanRequest(BaseModel):
    retrievers: List[str]
    top_k: Optional[int] = None
    budget: Optional[float] = None
    mode: Optional[str] = None


@app.get("/v1/admin/retrieval_budget")
def admin_retrieval_budget_stats():
    from ..middleware.retrieval_budget import get_retrieval_budget_planner
    p = get_retrieval_budget_planner()
    return {
        "stats": p.stats(),
        "profiles": p.list_profiles(),
        "active": p.list_active(),
    }


@app.post("/v1/admin/retrieval_budget/profile")
def admin_retrieval_budget_add_profile(req: BudgetProfileRequest,
                                            request: Request):
    from ..middleware.retrieval_budget import (
        get_retrieval_budget_planner, RetrieverCostProfile,
    )
    try:
        get_retrieval_budget_planner().register_profile(
            RetrieverCostProfile(
                name=req.name, unit_cost=req.unit_cost,
                base_cost=req.base_cost, quality=req.quality,
            ),
            replace=req.replace,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "name": req.name}


@app.post("/v1/admin/retrieval_budget/plan")
def admin_retrieval_budget_plan(req: BudgetPlanRequest,
                                    request: Request):
    from ..middleware.retrieval_budget import get_retrieval_budget_planner
    try:
        plan = get_retrieval_budget_planner().plan(
            retrievers=req.retrievers, top_k=req.top_k,
            budget=req.budget, mode=req.mode,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return plan.to_dict()


@app.post("/v1/admin/retrieval_budget/reset")
def admin_retrieval_budget_reset(request: Request):
    from ..middleware.retrieval_budget import get_retrieval_budget_planner
    get_retrieval_budget_planner().reset()
    return {"ok": True}


# ---- v3.18 Document summarizer --------------------------------------
class SummarizeRequest(BaseModel):
    doc_id: str
    text: str
    length: str = "medium"
    n_sentences: Optional[int] = None


class IndexDocsRequest(BaseModel):
    docs: List[Dict[str, Any]]


@app.get("/v1/admin/doc_summarizer")
def admin_doc_summarizer_stats():
    from ..middleware.doc_summarizer import get_doc_summarizer
    return get_doc_summarizer().stats()


@app.post("/v1/admin/doc_summarizer/summarize")
def admin_doc_summarizer_summarize(req: SummarizeRequest,
                                        request: Request):
    from ..middleware.doc_summarizer import get_doc_summarizer
    try:
        result = get_doc_summarizer().summarize(
            doc_id=req.doc_id, text=req.text,
            length=req.length, n_sentences=req.n_sentences,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/doc_summarizer/index")
def admin_doc_summarizer_index(req: IndexDocsRequest,
                                    request: Request):
    from ..middleware.doc_summarizer import get_doc_summarizer
    n = get_doc_summarizer().index_corpus(req.docs)
    return {"ok": True, "unique_tokens": n}


@app.post("/v1/admin/doc_summarizer/reset")
def admin_doc_summarizer_reset(request: Request):
    from ..middleware.doc_summarizer import get_doc_summarizer
    get_doc_summarizer().reset()
    return {"ok": True}


# ---- v3.19 Privilege filter -----------------------------------------
class ClassifyRequest(BaseModel):
    doc_id: str
    text: str


class FilterRequest(BaseModel):
    doc_id: str
    text: str
    user_clearances: List[str]
    masking_mode: str = "mask"


class PrivilegeRuleRequest(BaseModel):
    rule_id: str
    level: str
    pattern: str
    is_regex: bool = True
    description: str = ""
    replace: bool = False


@app.get("/v1/admin/privilege")
def admin_privilege_stats():
    from ..middleware.privilege_filter import get_privilege_filter
    p = get_privilege_filter()
    return {"stats": p.stats(), "rules": p.list_rules()}


@app.post("/v1/admin/privilege/classify")
def admin_privilege_classify(req: ClassifyRequest, request: Request):
    from ..middleware.privilege_filter import get_privilege_filter
    result = get_privilege_filter().classify(req.doc_id, req.text)
    return result.to_dict()


@app.post("/v1/admin/privilege/filter")
def admin_privilege_filter(req: FilterRequest, request: Request):
    from ..middleware.privilege_filter import get_privilege_filter
    try:
        decision = get_privilege_filter().filter(
            req.doc_id, req.text,
            user_clearances=req.user_clearances,
            masking_mode=req.masking_mode,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    out = decision.to_dict()
    if decision.masked_text is not None:
        out["masked_text"] = decision.masked_text
    return out


@app.post("/v1/admin/privilege/rule")
def admin_privilege_add_rule(req: PrivilegeRuleRequest,
                                  request: Request):
    from ..middleware.privilege_filter import (
        get_privilege_filter, PrivilegeRule,
    )
    try:
        get_privilege_filter().add_rule(
            PrivilegeRule(
                rule_id=req.rule_id, level=req.level,
                pattern=req.pattern, is_regex=req.is_regex,
                description=req.description,
            ),
            replace=req.replace,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "rule_id": req.rule_id}


@app.delete("/v1/admin/privilege/rule/{rule_id}")
def admin_privilege_remove_rule(rule_id: str, request: Request):
    from ..middleware.privilege_filter import get_privilege_filter
    if not get_privilege_filter().remove_rule(rule_id):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True, "rule_id": rule_id}


@app.post("/v1/admin/privilege/reset")
def admin_privilege_reset(request: Request):
    from ..middleware.privilege_filter import (
        PrivilegeFilter, set_privilege_filter,
    )
    set_privilege_filter(PrivilegeFilter())
    return {"ok": True}


# ---- v3.20 Time travel ----------------------------------------------
class AddVersionRequest(BaseModel):
    doc_id: str
    version: str
    effective_from: float
    effective_to: Optional[float] = None
    text: str = ""
    metadata: Optional[Dict[str, Any]] = None


class TimeTravelAtRequest(BaseModel):
    ts: float


class TimeTravelDiffRequest(BaseModel):
    doc_id: str
    from_ts: float
    to_ts: float


@app.get("/v1/admin/time_travel")
def admin_time_travel_stats():
    from ..middleware.time_travel import get_time_travel_index
    return get_time_travel_index().stats()


@app.post("/v1/admin/time_travel/version")
def admin_time_travel_add_version(req: AddVersionRequest,
                                        request: Request):
    from ..middleware.time_travel import (
        get_time_travel_index, VersionedDoc,
    )
    try:
        get_time_travel_index().add_version(VersionedDoc(
            doc_id=req.doc_id, version=req.version,
            effective_from=req.effective_from,
            effective_to=req.effective_to,
            text=req.text,
            metadata=req.metadata or {},
        ))
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/time_travel/at")
def admin_time_travel_at(req: TimeTravelAtRequest, request: Request):
    from ..middleware.time_travel import get_time_travel_index
    try:
        snap = get_time_travel_index().at(req.ts)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {
        "ts": snap.ts,
        "n_docs": snap.n_docs(),
        "doc_ids": snap.list_doc_ids(),
    }


@app.get("/v1/admin/time_travel/history/{doc_id}")
def admin_time_travel_history(doc_id: str,
                                   include_text: bool = False):
    from ..middleware.time_travel import get_time_travel_index
    return {
        "doc_id": doc_id,
        "history": get_time_travel_index().history(
            doc_id, include_text=include_text),
    }


@app.post("/v1/admin/time_travel/diff")
def admin_time_travel_diff(req: TimeTravelDiffRequest,
                                request: Request):
    from ..middleware.time_travel import get_time_travel_index
    try:
        diff = get_time_travel_index().diff(
            req.doc_id, req.from_ts, req.to_ts)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return diff.to_dict()


@app.post("/v1/admin/time_travel/reset")
def admin_time_travel_reset(request: Request):
    from ..middleware.time_travel import get_time_travel_index
    get_time_travel_index().reset()
    return {"ok": True}


# ---- v3.21 Fact extractor -------------------------------------------
class FactExtractRequest(BaseModel):
    doc_id: str
    text: str


class FactExtractEnabledRequest(BaseModel):
    enabled_types: List[str]


@app.get("/v1/admin/fact_extractor")
def admin_fact_extractor_stats():
    from ..middleware.fact_extractor import get_fact_extractor
    return get_fact_extractor().stats()


@app.post("/v1/admin/fact_extractor/extract")
def admin_fact_extractor_extract(req: FactExtractRequest,
                                      request: Request):
    from ..middleware.fact_extractor import get_fact_extractor
    result = get_fact_extractor().extract(req.doc_id, req.text)
    return result.to_dict()


@app.post("/v1/admin/fact_extractor/enabled")
def admin_fact_extractor_set_enabled(req: FactExtractEnabledRequest,
                                          request: Request):
    from ..middleware.fact_extractor import get_fact_extractor
    try:
        get_fact_extractor().set_enabled(req.enabled_types)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "enabled": req.enabled_types}


@app.post("/v1/admin/fact_extractor/reset")
def admin_fact_extractor_reset(request: Request):
    from ..middleware.fact_extractor import get_fact_extractor
    get_fact_extractor().reset()
    return {"ok": True}


# ---- v3.22 Answer consensus -----------------------------------------
class CandidateItem(BaseModel):
    answerer: str
    answer_text: str
    citations: Optional[List[str]] = None
    confidence: Optional[float] = None


class ConsensusRequest(BaseModel):
    question: str
    candidates: List[CandidateItem]


@app.get("/v1/admin/consensus")
def admin_consensus_stats():
    from ..middleware.answer_consensus import get_answer_consensus
    return get_answer_consensus().stats()


@app.post("/v1/admin/consensus/analyze")
def admin_consensus_analyze(req: ConsensusRequest,
                                request: Request):
    from ..middleware.answer_consensus import (
        get_answer_consensus, AnswerCandidate,
    )
    cands = [
        AnswerCandidate(
            answerer=c.answerer, answer_text=c.answer_text,
            citations=c.citations or [],
            confidence=c.confidence,
        )
        for c in req.candidates
    ]
    try:
        result = get_answer_consensus().analyze(req.question, cands)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/consensus/reset")
def admin_consensus_reset(request: Request):
    from ..middleware.answer_consensus import get_answer_consensus
    get_answer_consensus().reset()
    return {"ok": True}


# ---- v3.23 Authority ranker -----------------------------------------
class AuthorityRankRequest(BaseModel):
    docs: List[Dict[str, Any]]
    query: Optional[str] = None


class AuthorityWeightRequest(BaseModel):
    source_type: str
    weight: float


@app.get("/v1/admin/authority")
def admin_authority_stats():
    from ..middleware.authority_ranker import get_authority_ranker
    r = get_authority_ranker()
    return {
        "stats": r.stats(),
        "weights": r.list_weights(),
    }


@app.post("/v1/admin/authority/rank")
def admin_authority_rank(req: AuthorityRankRequest,
                             request: Request):
    from ..middleware.authority_ranker import get_authority_ranker
    try:
        result = get_authority_ranker().rank(
            docs=req.docs, query=req.query)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/authority/weight")
def admin_authority_set_weight(req: AuthorityWeightRequest,
                                    request: Request):
    from ..middleware.authority_ranker import get_authority_ranker
    try:
        get_authority_ranker().set_weight(req.source_type, req.weight)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.delete("/v1/admin/authority/weight/{source_type}")
def admin_authority_remove_weight(source_type: str, request: Request):
    from ..middleware.authority_ranker import get_authority_ranker
    if not get_authority_ranker().remove_weight(source_type):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True, "source_type": source_type}


@app.post("/v1/admin/authority/reset")
def admin_authority_reset(request: Request):
    from ..middleware.authority_ranker import get_authority_ranker
    get_authority_ranker().reset()
    return {"ok": True}


# ---- v3.24 Document comparator --------------------------------------
class DocCompareRequest(BaseModel):
    doc_a_id: str
    text_a: str
    doc_b_id: str
    text_b: str
    include_text: bool = True


@app.get("/v1/admin/doc_comparator")
def admin_doc_comparator_stats():
    from ..middleware.doc_comparator import get_doc_comparator
    return get_doc_comparator().stats()


@app.post("/v1/admin/doc_comparator/compare")
def admin_doc_comparator_compare(req: DocCompareRequest,
                                      request: Request):
    from ..middleware.doc_comparator import get_doc_comparator
    try:
        result = get_doc_comparator().compare(
            doc_a_id=req.doc_a_id, text_a=req.text_a,
            doc_b_id=req.doc_b_id, text_b=req.text_b,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict(include_text=req.include_text)


@app.post("/v1/admin/doc_comparator/reset")
def admin_doc_comparator_reset(request: Request):
    from ..middleware.doc_comparator import get_doc_comparator
    get_doc_comparator().reset()
    return {"ok": True}


# ---- v3.25 Similar docs ---------------------------------------------
class AddSimDocRequest(BaseModel):
    doc_id: str
    text: str
    metadata: Optional[Dict[str, Any]] = None


class SimDocsIndexRequest(BaseModel):
    docs: List[Dict[str, Any]]


class SimDocsFindRequest(BaseModel):
    doc_id: Optional[str] = None
    text: Optional[str] = None
    top_k: int = 10
    exclude_self: bool = True
    min_score: float = 0.0
    filter: Optional[Dict[str, Any]] = None


@app.get("/v1/admin/similar_docs")
def admin_similar_docs_stats():
    from ..middleware.similar_docs import get_similar_doc_finder
    return get_similar_doc_finder().stats()


@app.post("/v1/admin/similar_docs/add")
def admin_similar_docs_add(req: AddSimDocRequest, request: Request):
    from ..middleware.similar_docs import get_similar_doc_finder
    try:
        get_similar_doc_finder().add_doc(
            doc_id=req.doc_id, text=req.text,
            metadata=req.metadata,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/similar_docs/index")
def admin_similar_docs_index(req: SimDocsIndexRequest,
                                  request: Request):
    from ..middleware.similar_docs import get_similar_doc_finder
    added = get_similar_doc_finder().add_corpus(req.docs)
    return {"ok": True, "added": added}


@app.delete("/v1/admin/similar_docs/{doc_id}")
def admin_similar_docs_remove(doc_id: str, request: Request):
    from ..middleware.similar_docs import get_similar_doc_finder
    if not get_similar_doc_finder().remove_doc(doc_id):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True}


@app.post("/v1/admin/similar_docs/find")
def admin_similar_docs_find(req: SimDocsFindRequest,
                                 request: Request):
    from ..middleware.similar_docs import get_similar_doc_finder
    finder = get_similar_doc_finder()
    try:
        if req.doc_id:
            result = finder.find_similar_to_doc(
                doc_id=req.doc_id, top_k=req.top_k,
                exclude_self=req.exclude_self,
                min_score=req.min_score, filter=req.filter,
            )
        elif req.text is not None:
            result = finder.find_similar_to_text(
                text=req.text, top_k=req.top_k,
                min_score=req.min_score, filter=req.filter,
            )
        else:
            raise ValueError("either doc_id or text required")
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/similar_docs/reset")
def admin_similar_docs_reset(request: Request):
    from ..middleware.similar_docs import get_similar_doc_finder
    get_similar_doc_finder().reset()
    return {"ok": True}


# ---- v3.26 Query lifecycle ------------------------------------------
class LifecycleStartRequest(BaseModel):
    query_id: str
    query_text: str
    tenant_id: Optional[str] = None
    user_id: Optional[str] = None


class LifecycleRecordRequest(BaseModel):
    query_id: str
    stage: str
    ok: bool = True
    duration_ms: Optional[float] = None
    details: Optional[Dict[str, Any]] = None


class LifecycleFinishRequest(BaseModel):
    query_id: str
    outcome: str = "ok"
    final_answer: Optional[str] = None
    final_confidence: Optional[float] = None
    total_cost: Optional[float] = None


@app.get("/v1/admin/lifecycle")
def admin_lifecycle_stats():
    from ..middleware.query_lifecycle import get_lifecycle_reporter
    r = get_lifecycle_reporter()
    return {
        "stats": r.stats(),
        "n_active": len(r.list_active()),
    }


@app.post("/v1/admin/lifecycle/start")
def admin_lifecycle_start(req: LifecycleStartRequest,
                              request: Request):
    from ..middleware.query_lifecycle import get_lifecycle_reporter
    try:
        rec = get_lifecycle_reporter().start(
            query_id=req.query_id, query_text=req.query_text,
            tenant_id=req.tenant_id, user_id=req.user_id,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return rec.to_dict()


@app.post("/v1/admin/lifecycle/record")
def admin_lifecycle_record(req: LifecycleRecordRequest,
                                request: Request):
    from ..middleware.query_lifecycle import get_lifecycle_reporter
    try:
        rec = get_lifecycle_reporter().record(
            query_id=req.query_id, stage=req.stage,
            ok=req.ok, duration_ms=req.duration_ms,
            details=req.details,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "n_events": len(rec.events)}


@app.post("/v1/admin/lifecycle/finish")
def admin_lifecycle_finish(req: LifecycleFinishRequest,
                                request: Request):
    from ..middleware.query_lifecycle import get_lifecycle_reporter
    try:
        rec = get_lifecycle_reporter().finish(
            query_id=req.query_id, outcome=req.outcome,
            final_answer=req.final_answer,
            final_confidence=req.final_confidence,
            total_cost=req.total_cost,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    if rec is None:
        raise HTTPException(status_code=404,
                             detail={"error": "query_id not found"})
    return rec.to_dict()


@app.get("/v1/admin/lifecycle/record/{query_id}")
def admin_lifecycle_get_record(query_id: str):
    from ..middleware.query_lifecycle import get_lifecycle_reporter
    r = get_lifecycle_reporter().get_record(query_id)
    if r is None:
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return r


@app.get("/v1/admin/lifecycle/summary/{query_id}")
def admin_lifecycle_summary(query_id: str):
    from ..middleware.query_lifecycle import get_lifecycle_reporter
    try:
        return get_lifecycle_reporter().summarize(query_id)
    except ValueError as e:
        raise HTTPException(status_code=404, detail={"error": str(e)})


@app.get("/v1/admin/lifecycle/history")
def admin_lifecycle_history(
    limit: int = 50,
    outcome: Optional[str] = None,
    tenant_id: Optional[str] = None,
):
    from ..middleware.query_lifecycle import get_lifecycle_reporter
    try:
        return {
            "records": get_lifecycle_reporter().history(
                limit=limit, outcome=outcome, tenant_id=tenant_id,
            ),
        }
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})


@app.post("/v1/admin/lifecycle/reset")
def admin_lifecycle_reset(request: Request):
    from ..middleware.query_lifecycle import get_lifecycle_reporter
    get_lifecycle_reporter().reset()
    return {"ok": True}


# ---- v3.27 Timeline builder -----------------------------------------
class TimelineEventRequest(BaseModel):
    event_id: str
    iso_date: str
    description: str
    doc_id: Optional[str] = None
    event_type: Optional[str] = None
    metadata: Optional[Dict[str, Any]] = None
    confidence: float = 1.0


class TimelineIngestRequest(BaseModel):
    doc_id: str
    text: str


class TimelineBuildRequest(BaseModel):
    from_ts: Optional[float] = None
    to_ts: Optional[float] = None
    event_type: Optional[str] = None
    doc_id: Optional[str] = None


@app.get("/v1/admin/timeline")
def admin_timeline_stats():
    from ..middleware.timeline_builder import get_timeline_builder
    return get_timeline_builder().stats()


@app.post("/v1/admin/timeline/event")
def admin_timeline_add_event(req: TimelineEventRequest,
                                   request: Request):
    from ..middleware.timeline_builder import get_timeline_builder
    try:
        event = get_timeline_builder().add_event_simple(
            event_id=req.event_id, iso_date=req.iso_date,
            description=req.description,
            doc_id=req.doc_id, event_type=req.event_type,
            metadata=req.metadata, confidence=req.confidence,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return event.to_dict()


@app.post("/v1/admin/timeline/ingest")
def admin_timeline_ingest(req: TimelineIngestRequest,
                               request: Request):
    from ..middleware.timeline_builder import get_timeline_builder
    added = get_timeline_builder().ingest_from_fact_extraction(
        req.doc_id, req.text)
    return {"ok": True, "added": added}


@app.delete("/v1/admin/timeline/event/{event_id}")
def admin_timeline_remove(event_id: str, request: Request):
    from ..middleware.timeline_builder import get_timeline_builder
    if not get_timeline_builder().remove_event(event_id):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True}


@app.post("/v1/admin/timeline/build")
def admin_timeline_build(req: TimelineBuildRequest,
                              request: Request):
    from ..middleware.timeline_builder import get_timeline_builder
    report = get_timeline_builder().build(
        from_ts=req.from_ts, to_ts=req.to_ts,
        event_type=req.event_type, doc_id=req.doc_id,
    )
    return report.to_dict()


@app.post("/v1/admin/timeline/reset")
def admin_timeline_reset(request: Request):
    from ..middleware.timeline_builder import get_timeline_builder
    get_timeline_builder().reset()
    return {"ok": True}


# ---- v3.28 Corpus contradictions ------------------------------------
class CrawlRequest(BaseModel):
    docs: List[Dict[str, Any]]


@app.get("/v1/admin/contradictions")
def admin_contradictions_stats():
    from ..middleware.corpus_contradictions import get_contradiction_crawler
    return get_contradiction_crawler().stats()


@app.post("/v1/admin/contradictions/crawl")
def admin_contradictions_crawl(req: CrawlRequest, request: Request):
    from ..middleware.corpus_contradictions import get_contradiction_crawler
    try:
        report = get_contradiction_crawler().crawl(req.docs)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.post("/v1/admin/contradictions/reset")
def admin_contradictions_reset(request: Request):
    from ..middleware.corpus_contradictions import get_contradiction_crawler
    get_contradiction_crawler().reset()
    return {"ok": True}


# ---- v3.29 Anonymizer -----------------------------------------------
class AnonymizeRequest(BaseModel):
    doc_id: str
    text: str
    mode: str = "hash"


class AnonymizeBulkRequest(BaseModel):
    docs: List[Dict[str, Any]]
    mode: str = "hash"


class AnonTypesRequest(BaseModel):
    enabled_types: List[str]


@app.get("/v1/admin/anonymizer")
def admin_anonymizer_stats():
    from ..middleware.anonymizer import get_anonymizer
    return get_anonymizer().stats()


@app.post("/v1/admin/anonymizer/anonymize")
def admin_anonymizer_anonymize(req: AnonymizeRequest,
                                    request: Request):
    from ..middleware.anonymizer import get_anonymizer
    try:
        result = get_anonymizer().anonymize(
            doc_id=req.doc_id, text=req.text, mode=req.mode)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/anonymizer/anonymize_bulk")
def admin_anonymizer_bulk(req: AnonymizeBulkRequest,
                              request: Request):
    from ..middleware.anonymizer import get_anonymizer
    try:
        results = get_anonymizer().anonymize_docs(
            docs=req.docs, mode=req.mode)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"results": [r.to_dict(include_text=False)
                         for r in results]}


@app.post("/v1/admin/anonymizer/enabled")
def admin_anonymizer_set_enabled(req: AnonTypesRequest,
                                       request: Request):
    from ..middleware.anonymizer import get_anonymizer
    try:
        get_anonymizer().set_enabled(req.enabled_types)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "enabled": req.enabled_types}


@app.post("/v1/admin/anonymizer/reset")
def admin_anonymizer_reset(request: Request):
    from ..middleware.anonymizer import get_anonymizer
    get_anonymizer().reset()
    return {"ok": True}


# ---- v3.30 Document lineage -----------------------------------------
class LineageRegisterRequest(BaseModel):
    doc_id: str
    initial_hash: Optional[str] = None


class LineageLinkRequest(BaseModel):
    doc_id: str
    xform_type: str
    actor: str
    source_hash: Optional[str] = None
    output_hash: Optional[str] = None
    description: str = ""
    metadata: Optional[Dict[str, Any]] = None


class LineageSupersedeRequest(BaseModel):
    old_doc_id: str
    new_doc_id: str
    actor: str
    description: str = ""


@app.get("/v1/admin/lineage")
def admin_lineage_stats():
    from ..middleware.doc_lineage import get_doc_lineage
    return get_doc_lineage().stats()


@app.post("/v1/admin/lineage/register")
def admin_lineage_register(req: LineageRegisterRequest,
                               request: Request):
    from ..middleware.doc_lineage import get_doc_lineage
    try:
        get_doc_lineage().register_doc(
            doc_id=req.doc_id, initial_hash=req.initial_hash)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/lineage/link")
def admin_lineage_add_link(req: LineageLinkRequest,
                                request: Request):
    from ..middleware.doc_lineage import get_doc_lineage
    try:
        link = get_doc_lineage().add_link(
            doc_id=req.doc_id, xform_type=req.xform_type,
            actor=req.actor,
            source_hash=req.source_hash,
            output_hash=req.output_hash,
            description=req.description,
            metadata=req.metadata,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return link.to_dict()


@app.post("/v1/admin/lineage/supersede")
def admin_lineage_supersede(req: LineageSupersedeRequest,
                                request: Request):
    from ..middleware.doc_lineage import get_doc_lineage
    try:
        link = get_doc_lineage().supersede(
            old_doc_id=req.old_doc_id,
            new_doc_id=req.new_doc_id,
            actor=req.actor,
            description=req.description,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return link.to_dict()


@app.get("/v1/admin/lineage/doc/{doc_id}")
def admin_lineage_get(doc_id: str, include_links: bool = True):
    from ..middleware.doc_lineage import get_doc_lineage
    rec = get_doc_lineage().get_record(doc_id,
                                          include_links=include_links)
    if rec is None:
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return rec


@app.get("/v1/admin/lineage/verify/{doc_id}")
def admin_lineage_verify(doc_id: str):
    from ..middleware.doc_lineage import get_doc_lineage
    report = get_doc_lineage().verify(doc_id)
    return report.to_dict()


@app.get("/v1/admin/lineage/verify_all")
def admin_lineage_verify_all():
    from ..middleware.doc_lineage import get_doc_lineage
    return get_doc_lineage().verify_all()


@app.post("/v1/admin/lineage/reset")
def admin_lineage_reset(request: Request):
    from ..middleware.doc_lineage import get_doc_lineage
    get_doc_lineage().reset()
    return {"ok": True}


# ---- v3.31 Result explainer -----------------------------------------
class ExplainDocRequest(BaseModel):
    query: str
    doc: Dict[str, Any]
    lang: Optional[str] = None


class ExplainResultsRequest(BaseModel):
    query: str
    docs: List[Dict[str, Any]]
    lang: Optional[str] = None


@app.get("/v1/admin/explain")
def admin_explain_stats():
    from ..middleware.result_explainer import get_result_explainer
    return get_result_explainer().stats()


@app.post("/v1/admin/explain/doc")
def admin_explain_doc(req: ExplainDocRequest, request: Request):
    from ..middleware.result_explainer import get_result_explainer
    try:
        expl = get_result_explainer().explain_doc(
            query=req.query, doc=req.doc, lang=req.lang)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return expl.to_dict()


@app.post("/v1/admin/explain/results")
def admin_explain_results(req: ExplainResultsRequest,
                              request: Request):
    from ..middleware.result_explainer import get_result_explainer
    try:
        report = get_result_explainer().explain_results(
            query=req.query, docs=req.docs, lang=req.lang)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.post("/v1/admin/explain/reset")
def admin_explain_reset(request: Request):
    from ..middleware.result_explainer import get_result_explainer
    get_result_explainer().reset()
    return {"ok": True}


# ---- v3.32 Audit anomaly detector -----------------------------------
class AnomalyDetectRequest(BaseModel):
    records: List[Dict[str, Any]]


class AnomalyBaselineActionsRequest(BaseModel):
    actor: str
    actions: List[str]


class AnomalyBaselineVolumeRequest(BaseModel):
    action: str
    mean: float


@app.get("/v1/admin/audit_anomaly")
def admin_audit_anomaly_stats():
    from ..middleware.audit_anomaly import get_audit_anomaly_detector
    return get_audit_anomaly_detector().stats()


@app.post("/v1/admin/audit_anomaly/detect")
def admin_audit_anomaly_detect(req: AnomalyDetectRequest,
                                    request: Request):
    from ..middleware.audit_anomaly import get_audit_anomaly_detector
    try:
        report = get_audit_anomaly_detector().detect(req.records)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.post("/v1/admin/audit_anomaly/baseline_actions")
def admin_audit_anomaly_baseline_actions(
    req: AnomalyBaselineActionsRequest, request: Request
):
    from ..middleware.audit_anomaly import get_audit_anomaly_detector
    try:
        get_audit_anomaly_detector().add_baseline_actions(
            actor=req.actor, actions=req.actions)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/audit_anomaly/baseline_volume")
def admin_audit_anomaly_baseline_volume(
    req: AnomalyBaselineVolumeRequest, request: Request
):
    from ..middleware.audit_anomaly import get_audit_anomaly_detector
    try:
        get_audit_anomaly_detector().set_baseline_volume(
            action=req.action, mean=req.mean)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/audit_anomaly/reset")
def admin_audit_anomaly_reset(request: Request):
    from ..middleware.audit_anomaly import get_audit_anomaly_detector
    get_audit_anomaly_detector().reset()
    return {"ok": True}


# ---- v3.33 LLM router -----------------------------------------------
class LLMBackendRequest(BaseModel):
    name: str
    cost_per_1k_input_tokens: float
    cost_per_1k_output_tokens: float
    avg_latency_ms: float
    quality_tier: float
    max_context_tokens: int = 200_000
    replace: bool = False


class LLMRouteRequest(BaseModel):
    query: str
    tier: Optional[str] = None
    budget_cents: Optional[float] = None
    latency_sla_ms: Optional[float] = None
    tenant_id: Optional[str] = None
    intent: Optional[str] = None
    n_subqueries: Optional[int] = None
    estimated_input_tokens: int = 500
    estimated_output_tokens: int = 300


@app.get("/v1/admin/llm_router")
def admin_llm_router_stats():
    from ..middleware.llm_router import get_llm_router
    r = get_llm_router()
    return {
        "stats": r.stats(),
        "backends": r.list_backends(),
        "tenant_overrides": r.list_tenant_overrides(),
    }


@app.post("/v1/admin/llm_router/backend")
def admin_llm_router_add_backend(req: LLMBackendRequest,
                                       request: Request):
    from ..middleware.llm_router import (
        get_llm_router, LLMBackend,
    )
    try:
        get_llm_router().register_backend(
            LLMBackend(
                name=req.name,
                cost_per_1k_input_tokens=req.cost_per_1k_input_tokens,
                cost_per_1k_output_tokens=req.cost_per_1k_output_tokens,
                avg_latency_ms=req.avg_latency_ms,
                quality_tier=req.quality_tier,
                max_context_tokens=req.max_context_tokens,
            ),
            replace=req.replace,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.delete("/v1/admin/llm_router/backend/{name}")
def admin_llm_router_remove_backend(name: str, request: Request):
    from ..middleware.llm_router import get_llm_router
    if not get_llm_router().unregister_backend(name):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True}


@app.post("/v1/admin/llm_router/route")
def admin_llm_router_route(req: LLMRouteRequest, request: Request):
    from ..middleware.llm_router import get_llm_router
    try:
        decision = get_llm_router().route(
            query=req.query, tier=req.tier,
            budget_cents=req.budget_cents,
            latency_sla_ms=req.latency_sla_ms,
            tenant_id=req.tenant_id,
            intent=req.intent,
            n_subqueries=req.n_subqueries,
            estimated_input_tokens=req.estimated_input_tokens,
            estimated_output_tokens=req.estimated_output_tokens,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return decision.to_dict()


@app.post("/v1/admin/llm_router/reset")
def admin_llm_router_reset(request: Request):
    from ..middleware.llm_router import get_llm_router
    get_llm_router().reset()
    return {"ok": True}


# ---- v3.34 Citation expander ----------------------------------------
class CitationExpandRequest(BaseModel):
    text: str


@app.get("/v1/admin/citation_expander")
def admin_citation_expander_stats():
    from ..middleware.citation_expander import get_citation_expander
    return get_citation_expander().stats()


@app.post("/v1/admin/citation_expander/expand")
def admin_citation_expander_expand(req: CitationExpandRequest,
                                        request: Request):
    from ..middleware.citation_expander import get_citation_expander
    try:
        result = get_citation_expander().expand(req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/citation_expander/clear_cache")
def admin_citation_expander_clear(request: Request):
    from ..middleware.citation_expander import get_citation_expander
    get_citation_expander().clear_cache()
    return {"ok": True}


@app.post("/v1/admin/citation_expander/reset")
def admin_citation_expander_reset(request: Request):
    from ..middleware.citation_expander import get_citation_expander
    get_citation_expander().reset()
    return {"ok": True}


# ---- v3.35 Answer quality gate --------------------------------------
class QualityCheckRequest(BaseModel):
    answer: str
    confidence: Optional[float] = None
    grounded_ratio: Optional[float] = None
    template_report: Optional[Dict[str, Any]] = None


class UnsafePatternRequest(BaseModel):
    pattern: str


@app.get("/v1/admin/quality_gate")
def admin_quality_gate_stats():
    from ..middleware.answer_quality_gate import get_answer_quality_gate
    return get_answer_quality_gate().stats()


@app.post("/v1/admin/quality_gate/check")
def admin_quality_gate_check(req: QualityCheckRequest,
                                  request: Request):
    from ..middleware.answer_quality_gate import get_answer_quality_gate
    try:
        verdict = get_answer_quality_gate().check(
            answer=req.answer,
            confidence=req.confidence,
            grounded_ratio=req.grounded_ratio,
            template_report=req.template_report,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return verdict.to_dict()


@app.post("/v1/admin/quality_gate/unsafe_pattern")
def admin_quality_gate_add_pattern(req: UnsafePatternRequest,
                                         request: Request):
    from ..middleware.answer_quality_gate import get_answer_quality_gate
    try:
        get_answer_quality_gate().add_unsafe_pattern(req.pattern)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/quality_gate/reset")
def admin_quality_gate_reset(request: Request):
    from ..middleware.answer_quality_gate import get_answer_quality_gate
    get_answer_quality_gate().reset()
    return {"ok": True}


# ---- v3.36 Coverage monitor -----------------------------------------
class CoverageRecordRequest(BaseModel):
    query: str
    intent: Optional[str] = None
    topic: Optional[str] = None
    n_retrieved: int = 0
    max_score: Optional[float] = None
    final_confidence: Optional[float] = None
    user_feedback: Optional[str] = None


class CoverageFeedbackRequest(BaseModel):
    query_substring: str
    feedback: str


@app.get("/v1/admin/coverage")
def admin_coverage_stats():
    from ..middleware.coverage_monitor import get_coverage_monitor
    return get_coverage_monitor().stats()


@app.post("/v1/admin/coverage/record")
def admin_coverage_record(req: CoverageRecordRequest,
                              request: Request):
    from ..middleware.coverage_monitor import get_coverage_monitor
    try:
        get_coverage_monitor().record(
            query=req.query, intent=req.intent,
            topic=req.topic, n_retrieved=req.n_retrieved,
            max_score=req.max_score,
            final_confidence=req.final_confidence,
            user_feedback=req.user_feedback,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/coverage/feedback")
def admin_coverage_feedback(req: CoverageFeedbackRequest,
                                request: Request):
    from ..middleware.coverage_monitor import get_coverage_monitor
    try:
        n = get_coverage_monitor().record_feedback(
            query_substring=req.query_substring,
            feedback=req.feedback,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "updated": n}


@app.get("/v1/admin/coverage/analyze")
def admin_coverage_analyze(group_by: str = "both"):
    from ..middleware.coverage_monitor import get_coverage_monitor
    try:
        report = get_coverage_monitor().analyze(group_by=group_by)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.get("/v1/admin/coverage/gaps")
def admin_coverage_gaps(group_by: str = "both"):
    from ..middleware.coverage_monitor import get_coverage_monitor
    try:
        gaps = get_coverage_monitor().gaps(group_by=group_by)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"gaps": [g.to_dict() for g in gaps]}


@app.post("/v1/admin/coverage/reset")
def admin_coverage_reset(request: Request):
    from ..middleware.coverage_monitor import get_coverage_monitor
    get_coverage_monitor().reset()
    return {"ok": True}


# ---- v3.37 Follow-up rewriter ---------------------------------------
class FollowupRewriteRequest(BaseModel):
    query: str
    history: Optional[List[str]] = None


@app.get("/v1/admin/followup")
def admin_followup_stats():
    from ..middleware.followup_rewriter import get_followup_rewriter
    return get_followup_rewriter().stats()


@app.post("/v1/admin/followup/rewrite")
def admin_followup_rewrite(req: FollowupRewriteRequest,
                               request: Request):
    from ..middleware.followup_rewriter import get_followup_rewriter
    try:
        result = get_followup_rewriter().rewrite(
            query=req.query, history=req.history)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/followup/reset")
def admin_followup_reset(request: Request):
    from ..middleware.followup_rewriter import get_followup_rewriter
    get_followup_rewriter().reset()
    return {"ok": True}


# ---- v3.38 Reasoning chain ------------------------------------------
class ReasoningStartRequest(BaseModel):
    chain_id: str
    query: str


class ReasoningStepRequest(BaseModel):
    chain_id: str
    step_id: str
    step_type: str
    text: str
    support: Optional[List[str]] = None
    dependencies: Optional[List[str]] = None
    confidence: float = 1.0
    metadata: Optional[Dict[str, Any]] = None


@app.get("/v1/admin/reasoning")
def admin_reasoning_stats():
    from ..middleware.reasoning_chain import get_reasoning_recorder
    r = get_reasoning_recorder()
    return {"stats": r.stats(), "chains": r.list_chains()}


@app.post("/v1/admin/reasoning/start")
def admin_reasoning_start(req: ReasoningStartRequest,
                              request: Request):
    from ..middleware.reasoning_chain import get_reasoning_recorder
    try:
        chain = get_reasoning_recorder().start_chain(
            chain_id=req.chain_id, query=req.query)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return chain.to_dict()


@app.post("/v1/admin/reasoning/step")
def admin_reasoning_step(req: ReasoningStepRequest,
                             request: Request):
    from ..middleware.reasoning_chain import get_reasoning_recorder
    try:
        step = get_reasoning_recorder().add_step(
            chain_id=req.chain_id, step_id=req.step_id,
            step_type=req.step_type, text=req.text,
            support=req.support, dependencies=req.dependencies,
            confidence=req.confidence, metadata=req.metadata,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return step.to_dict()


@app.get("/v1/admin/reasoning/validate/{chain_id}")
def admin_reasoning_validate(chain_id: str):
    from ..middleware.reasoning_chain import get_reasoning_recorder
    return get_reasoning_recorder().validate(chain_id).to_dict()


@app.post("/v1/admin/reasoning/finalize/{chain_id}")
def admin_reasoning_finalize(chain_id: str, request: Request):
    from ..middleware.reasoning_chain import get_reasoning_recorder
    report = get_reasoning_recorder().finalize(chain_id)
    return report.to_dict()


@app.get("/v1/admin/reasoning/chain/{chain_id}")
def admin_reasoning_get(chain_id: str):
    from ..middleware.reasoning_chain import get_reasoning_recorder
    chain = get_reasoning_recorder().get_chain(chain_id)
    if chain is None:
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return chain


@app.get("/v1/admin/reasoning/render/{chain_id}")
def admin_reasoning_render(chain_id: str):
    from fastapi.responses import PlainTextResponse
    from ..middleware.reasoning_chain import get_reasoning_recorder
    md = get_reasoning_recorder().render_markdown(chain_id)
    return PlainTextResponse(md)


@app.delete("/v1/admin/reasoning/chain/{chain_id}")
def admin_reasoning_remove(chain_id: str, request: Request):
    from ..middleware.reasoning_chain import get_reasoning_recorder
    if not get_reasoning_recorder().remove_chain(chain_id):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True}


@app.post("/v1/admin/reasoning/reset")
def admin_reasoning_reset(request: Request):
    from ..middleware.reasoning_chain import get_reasoning_recorder
    get_reasoning_recorder().reset()
    return {"ok": True}


# ---- v3.39 Injection detector ---------------------------------------
class InjectionScanRequest(BaseModel):
    text: str
    source: str = "query"


class InjectionScanDocsRequest(BaseModel):
    docs: List[Dict[str, Any]]


class InjectionRuleRequest(BaseModel):
    rule_id: str
    category: str
    severity: str
    pattern: str
    replace: bool = False


@app.get("/v1/admin/injection")
def admin_injection_stats():
    from ..middleware.injection_detector import get_injection_detector
    d = get_injection_detector()
    return {"stats": d.stats(), "rules": d.list_rules()}


@app.post("/v1/admin/injection/scan")
def admin_injection_scan(req: InjectionScanRequest,
                             request: Request):
    from ..middleware.injection_detector import get_injection_detector
    try:
        result = get_injection_detector().scan(
            req.text, source=req.source)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/injection/scan_docs")
def admin_injection_scan_docs(req: InjectionScanDocsRequest,
                                  request: Request):
    from ..middleware.injection_detector import get_injection_detector
    scans = get_injection_detector().scan_docs(req.docs)
    return {"scans": [s.to_dict() for s in scans]}


@app.post("/v1/admin/injection/filter")
def admin_injection_filter(req: InjectionScanDocsRequest,
                               request: Request):
    from ..middleware.injection_detector import get_injection_detector
    safe, unsafe = get_injection_detector().filter_safe_docs(req.docs)
    return {
        "n_safe": len(safe),
        "n_unsafe": len(unsafe),
        "safe_doc_ids": [d.get("doc_id") or d.get("id", "")
                           for d in safe],
        "unsafe_doc_ids": [d.get("doc_id") or d.get("id", "")
                             for d in unsafe],
    }


@app.post("/v1/admin/injection/rule")
def admin_injection_add_rule(req: InjectionRuleRequest,
                                 request: Request):
    from ..middleware.injection_detector import (
        get_injection_detector, InjectionRule,
    )
    try:
        get_injection_detector().add_rule(
            InjectionRule(
                rule_id=req.rule_id,
                category=req.category,
                severity=req.severity,
                pattern=req.pattern,
            ),
            replace=req.replace,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.delete("/v1/admin/injection/rule/{rule_id}")
def admin_injection_remove_rule(rule_id: str, request: Request):
    from ..middleware.injection_detector import get_injection_detector
    if not get_injection_detector().remove_rule(rule_id):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True}


@app.post("/v1/admin/injection/reset")
def admin_injection_reset(request: Request):
    from ..middleware.injection_detector import (
        InjectionDetector, set_injection_detector,
    )
    set_injection_detector(InjectionDetector())
    return {"ok": True}


# ---- v3.40 Cost forecaster ------------------------------------------
class StageProfileRequest(BaseModel):
    name: str
    base_cost_cents: float = 0.0
    base_latency_ms: float = 0.0
    per_token_cost_cents: float = 0.0
    per_token_latency_ms: float = 0.0
    per_doc_cost_cents: float = 0.0
    replace: bool = False


class ForecastRequest(BaseModel):
    query: str
    intent: Optional[str] = None
    n_subqueries: Optional[int] = None
    top_k: int = 10
    estimated_output_tokens: int = 300
    stages: Optional[List[str]] = None
    budget_cents: Optional[float] = None
    sla_ms: Optional[float] = None


class RecordActualRequest(BaseModel):
    intent: str
    complexity: str
    actual_cost_cents: float
    actual_latency_ms: float


@app.get("/v1/admin/cost_forecast")
def admin_cost_forecast_stats():
    from ..middleware.cost_forecaster import get_cost_forecaster
    f = get_cost_forecaster()
    return {"stats": f.stats(), "stages": f.list_stages()}


@app.post("/v1/admin/cost_forecast/stage")
def admin_cost_forecast_add_stage(req: StageProfileRequest,
                                       request: Request):
    from ..middleware.cost_forecaster import (
        get_cost_forecaster, StageProfile,
    )
    try:
        get_cost_forecaster().register_stage(
            StageProfile(
                name=req.name,
                base_cost_cents=req.base_cost_cents,
                base_latency_ms=req.base_latency_ms,
                per_token_cost_cents=req.per_token_cost_cents,
                per_token_latency_ms=req.per_token_latency_ms,
                per_doc_cost_cents=req.per_doc_cost_cents,
            ),
            replace=req.replace,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/cost_forecast/forecast")
def admin_cost_forecast_run(req: ForecastRequest, request: Request):
    from ..middleware.cost_forecaster import get_cost_forecaster
    try:
        forecast = get_cost_forecaster().forecast(
            query=req.query, intent=req.intent,
            n_subqueries=req.n_subqueries,
            top_k=req.top_k,
            estimated_output_tokens=req.estimated_output_tokens,
            stages=req.stages,
            budget_cents=req.budget_cents,
            sla_ms=req.sla_ms,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return forecast.to_dict()


@app.post("/v1/admin/cost_forecast/actual")
def admin_cost_forecast_record(req: RecordActualRequest,
                                    request: Request):
    from ..middleware.cost_forecaster import get_cost_forecaster
    try:
        get_cost_forecaster().record_actual(
            intent=req.intent, complexity=req.complexity,
            actual_cost_cents=req.actual_cost_cents,
            actual_latency_ms=req.actual_latency_ms,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/cost_forecast/reset")
def admin_cost_forecast_reset(request: Request):
    from ..middleware.cost_forecaster import get_cost_forecaster
    get_cost_forecaster().reset()
    return {"ok": True}


# ---- v3.41 Cross-lingual bridge -------------------------------------
class BridgeRequest(BaseModel):
    query: str
    target_lang: Optional[str] = None
    source_lang: Optional[str] = None


class AddTermRequest(BaseModel):
    en: str
    he: str
    domain: str = "general"
    confidence: float = 1.0


class TranslateTermRequest(BaseModel):
    term: str
    source_lang: str


@app.get("/v1/admin/crosslingual")
def admin_crosslingual_stats():
    from ..middleware.crosslingual_bridge import get_crosslingual_bridge
    return get_crosslingual_bridge().stats()


@app.post("/v1/admin/crosslingual/bridge")
def admin_crosslingual_bridge(req: BridgeRequest, request: Request):
    from ..middleware.crosslingual_bridge import get_crosslingual_bridge
    try:
        result = get_crosslingual_bridge().bridge(
            query=req.query, target_lang=req.target_lang,
            source_lang=req.source_lang,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


@app.post("/v1/admin/crosslingual/term")
def admin_crosslingual_add_term(req: AddTermRequest,
                                     request: Request):
    from ..middleware.crosslingual_bridge import get_crosslingual_bridge
    try:
        get_crosslingual_bridge().add_term(
            en=req.en, he=req.he,
            domain=req.domain, confidence=req.confidence,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/crosslingual/translate")
def admin_crosslingual_translate(req: TranslateTermRequest,
                                       request: Request):
    from ..middleware.crosslingual_bridge import get_crosslingual_bridge
    try:
        result = get_crosslingual_bridge().translate_term(
            req.term, req.source_lang)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    if result is None:
        raise HTTPException(status_code=404,
                             detail={"error": "term not in dictionary"})
    return {"term": req.term, "translation": result}


@app.get("/v1/admin/crosslingual/terms")
def admin_crosslingual_list_terms(
    domain: Optional[str] = None, limit: int = 100,
):
    from ..middleware.crosslingual_bridge import get_crosslingual_bridge
    try:
        terms = get_crosslingual_bridge().list_terms(
            domain=domain, limit=limit)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"terms": terms}


@app.post("/v1/admin/crosslingual/reset")
def admin_crosslingual_reset(request: Request):
    from ..middleware.crosslingual_bridge import get_crosslingual_bridge
    get_crosslingual_bridge().reset()
    return {"ok": True}


# ---- v3.42 Diversity enforcer ---------------------------------------
class DiversityEnforceRequest(BaseModel):
    docs: List[Dict[str, Any]]
    top_k: int = 10
    lambda_diversity: Optional[float] = None


@app.get("/v1/admin/diversity")
def admin_diversity_stats():
    from ..middleware.diversity_enforcer import get_diversity_enforcer
    e = get_diversity_enforcer()
    return {"stats": e.stats(), "dimensions": e.list_dimensions()}


@app.post("/v1/admin/diversity/enforce")
def admin_diversity_enforce(req: DiversityEnforceRequest,
                                request: Request):
    from ..middleware.diversity_enforcer import get_diversity_enforcer
    try:
        report = get_diversity_enforcer().enforce(
            docs=req.docs, top_k=req.top_k,
            lambda_diversity=req.lambda_diversity,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


class DiversityEnforcerLambdaRequest(BaseModel):
    value: float


@app.post("/v1/admin/diversity_enforcer/lambda")
def admin_diversity_enforcer_set_lambda(
    req: DiversityEnforcerLambdaRequest, request: Request,
):
    from ..middleware.diversity_enforcer import get_diversity_enforcer
    try:
        get_diversity_enforcer().set_lambda(req.value)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "lambda_diversity": req.value}


@app.post("/v1/admin/diversity/reset")
def admin_diversity_reset(request: Request):
    from ..middleware.diversity_enforcer import get_diversity_enforcer
    get_diversity_enforcer().reset()
    return {"ok": True}


# ---- v3.43 Session exporter -----------------------------------------
class SessionExportRequest(BaseModel):
    session: Dict[str, Any]
    format: Optional[str] = None


@app.get("/v1/admin/session_export")
def admin_session_export_stats():
    from ..middleware.session_exporter import get_session_exporter
    return get_session_exporter().stats()


@app.post("/v1/admin/session_export/export")
def admin_session_export(req: SessionExportRequest, request: Request):
    from ..middleware.session_exporter import (
        get_session_exporter, SessionExporter,
    )
    try:
        session = SessionExporter.session_from_dict(req.session)
        result = get_session_exporter().export(session,
                                                  format=req.format)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return result.to_dict()


class ExportFormatRequest(BaseModel):
    value: str


class ExportAnonymizeRequest(BaseModel):
    value: bool


@app.post("/v1/admin/session_export/format")
def admin_session_export_format(req: ExportFormatRequest,
                                      request: Request):
    from ..middleware.session_exporter import get_session_exporter
    try:
        get_session_exporter().set_default_format(req.value)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "default_format": req.value}


@app.post("/v1/admin/session_export/anonymize")
def admin_session_export_anonymize(req: ExportAnonymizeRequest,
                                         request: Request):
    from ..middleware.session_exporter import get_session_exporter
    get_session_exporter().set_redactor_enabled(req.value)
    return {"ok": True, "redactor_enabled": req.value}


@app.post("/v1/admin/session_export/reset")
def admin_session_export_reset(request: Request):
    from ..middleware.session_exporter import get_session_exporter
    get_session_exporter().reset()
    return {"ok": True}


# ---- v3.44 Fact consistency -----------------------------------------
class FactCheckRequest(BaseModel):
    answer: str


class FactLawRequest(BaseModel):
    name: str
    year: int
    he_year: Optional[str] = None


class FactCaseRequest(BaseModel):
    canonical_id: str
    metadata: Dict[str, Any] = {}


class FactSectionRequest(BaseModel):
    key: str
    text: str
    law: Optional[str] = None


@app.get("/v1/admin/fact_check")
def admin_fact_check_stats():
    from ..middleware.fact_consistency import get_fact_consistency_checker
    return get_fact_consistency_checker().stats()


@app.post("/v1/admin/fact_check/check")
def admin_fact_check(req: FactCheckRequest, request: Request):
    from ..middleware.fact_consistency import get_fact_consistency_checker
    try:
        report = get_fact_consistency_checker().check(req.answer)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.post("/v1/admin/fact_check/law")
def admin_fact_check_add_law(req: FactLawRequest, request: Request):
    from ..middleware.fact_consistency import get_fact_consistency_checker
    try:
        get_fact_consistency_checker().add_law(
            name=req.name, year=req.year, he_year=req.he_year)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/fact_check/case")
def admin_fact_check_add_case(req: FactCaseRequest, request: Request):
    from ..middleware.fact_consistency import get_fact_consistency_checker
    try:
        get_fact_consistency_checker().add_case(
            canonical_id=req.canonical_id, **req.metadata)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/fact_check/section")
def admin_fact_check_add_section(req: FactSectionRequest,
                                       request: Request):
    from ..middleware.fact_consistency import get_fact_consistency_checker
    try:
        get_fact_consistency_checker().add_section(
            key=req.key, text=req.text, law=req.law)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/fact_check/reset")
def admin_fact_check_reset(request: Request):
    from ..middleware.fact_consistency import get_fact_consistency_checker
    get_fact_consistency_checker().reset()
    return {"ok": True}


# ---- v3.45 Issue spotter --------------------------------------------
class IssueScanRequest(BaseModel):
    doc_id: str
    text: str


class IssueScanDocsRequest(BaseModel):
    docs: List[Dict[str, Any]]


@app.get("/v1/admin/issue_spotter")
def admin_issue_spotter_stats():
    from ..middleware.issue_spotter import get_issue_spotter
    s = get_issue_spotter()
    return {"stats": s.stats(), "rules": s.list_rules()}


@app.post("/v1/admin/issue_spotter/scan")
def admin_issue_spotter_scan(req: IssueScanRequest, request: Request):
    from ..middleware.issue_spotter import get_issue_spotter
    try:
        report = get_issue_spotter().scan(req.doc_id, req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.post("/v1/admin/issue_spotter/scan_docs")
def admin_issue_spotter_scan_docs(req: IssueScanDocsRequest,
                                       request: Request):
    from ..middleware.issue_spotter import get_issue_spotter
    reports = get_issue_spotter().scan_docs(req.docs)
    return {"reports": [r.to_dict() for r in reports]}


@app.post("/v1/admin/issue_spotter/reset")
def admin_issue_spotter_reset(request: Request):
    from ..middleware.issue_spotter import (
        IssueSpotter, set_issue_spotter,
    )
    set_issue_spotter(IssueSpotter())
    return {"ok": True}


# ---- v3.46 Document classifier --------------------------------------
class ClassifyRequest(BaseModel):
    doc_id: str
    text: str


class ClassifyManyRequest(BaseModel):
    docs: List[Dict[str, Any]]


@app.get("/v1/admin/doc_classifier")
def admin_doc_classifier_stats():
    from ..middleware.doc_classifier import get_doc_classifier
    c = get_doc_classifier()
    return {"stats": c.stats(),
             "supported_types": c.supported_types()}


@app.post("/v1/admin/doc_classifier/classify")
def admin_doc_classifier_classify(req: ClassifyRequest,
                                       request: Request):
    from ..middleware.doc_classifier import get_doc_classifier
    try:
        r = get_doc_classifier().classify(req.doc_id, req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return r.to_dict()


@app.post("/v1/admin/doc_classifier/classify_many")
def admin_doc_classifier_classify_many(req: ClassifyManyRequest,
                                             request: Request):
    from ..middleware.doc_classifier import get_doc_classifier
    results = get_doc_classifier().classify_many(req.docs)
    return {"results": [r.to_dict() for r in results]}


@app.post("/v1/admin/doc_classifier/reset")
def admin_doc_classifier_reset(request: Request):
    from ..middleware.doc_classifier import get_doc_classifier
    get_doc_classifier().reset()
    return {"ok": True}


# ---- v3.47 Cache invalidator ----------------------------------------
class CacheRegisterRequest(BaseModel):
    cache_key: str
    doc_ids: List[str]
    tenant_id: Optional[str] = None
    source_types: Optional[List[str]] = None
    ttl_seconds: Optional[float] = None


class CacheInvalidateDocRequest(BaseModel):
    doc_id: str
    reason: str = "doc_updated"
    actor: Optional[str] = None


class CacheInvalidateBulkRequest(BaseModel):
    doc_ids: List[str]
    reason: str = "bulk"
    actor: Optional[str] = None


class CacheInvalidatePatternRequest(BaseModel):
    pattern: str
    reason: str = "bulk"
    actor: Optional[str] = None


@app.get("/v1/admin/cache_invalidator")
def admin_cache_invalidator_stats():
    from ..middleware.cache_invalidator import get_cache_invalidator
    return get_cache_invalidator().stats()


@app.post("/v1/admin/cache_invalidator/register")
def admin_cache_invalidator_register(req: CacheRegisterRequest,
                                           request: Request):
    from ..middleware.cache_invalidator import get_cache_invalidator
    try:
        get_cache_invalidator().register(
            cache_key=req.cache_key,
            doc_ids=req.doc_ids,
            tenant_id=req.tenant_id,
            source_types=req.source_types,
            ttl_seconds=req.ttl_seconds,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.delete("/v1/admin/cache_invalidator/register/{cache_key}")
def admin_cache_invalidator_unregister(cache_key: str,
                                             request: Request):
    from ..middleware.cache_invalidator import get_cache_invalidator
    if not get_cache_invalidator().unregister(cache_key):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True}


@app.post("/v1/admin/cache_invalidator/invalidate_doc")
def admin_cache_invalidator_doc(req: CacheInvalidateDocRequest,
                                      request: Request):
    from ..middleware.cache_invalidator import get_cache_invalidator
    try:
        event = get_cache_invalidator().invalidate_doc(
            doc_id=req.doc_id, reason=req.reason,
            actor=req.actor,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return event.to_dict()


@app.post("/v1/admin/cache_invalidator/invalidate_bulk")
def admin_cache_invalidator_bulk(req: CacheInvalidateBulkRequest,
                                       request: Request):
    from ..middleware.cache_invalidator import get_cache_invalidator
    event = get_cache_invalidator().invalidate_docs(
        doc_ids=req.doc_ids, reason=req.reason,
        actor=req.actor,
    )
    return event.to_dict()


@app.post("/v1/admin/cache_invalidator/invalidate_pattern")
def admin_cache_invalidator_pattern(
    req: CacheInvalidatePatternRequest, request: Request,
):
    from ..middleware.cache_invalidator import get_cache_invalidator
    try:
        event = get_cache_invalidator().invalidate_pattern(
            pattern=req.pattern, reason=req.reason,
            actor=req.actor,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return event.to_dict()


@app.get("/v1/admin/cache_invalidator/history")
def admin_cache_invalidator_history(
    limit: int = 50, reason: Optional[str] = None,
):
    from ..middleware.cache_invalidator import get_cache_invalidator
    try:
        return {
            "events": get_cache_invalidator().history(
                limit=limit, reason=reason),
        }
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})


@app.post("/v1/admin/cache_invalidator/reset")
def admin_cache_invalidator_reset(request: Request):
    from ..middleware.cache_invalidator import get_cache_invalidator
    get_cache_invalidator().reset()
    return {"ok": True}


# ---- v3.48 Query analytics ------------------------------------------
class AnalyticsRecordRequest(BaseModel):
    query: str
    tenant_id: Optional[str] = None
    user_id: Optional[str] = None
    intent: Optional[str] = None
    latency_ms: Optional[float] = None
    cost_cents: Optional[float] = None
    ok: bool = True
    failed_stage: Optional[str] = None
    n_retrieved: int = 0
    confidence: Optional[float] = None


@app.get("/v1/admin/query_analytics")
def admin_query_analytics_stats():
    from ..middleware.query_analytics import get_query_analytics
    return get_query_analytics().stats()


@app.post("/v1/admin/query_analytics/record")
def admin_query_analytics_record(req: AnalyticsRecordRequest,
                                      request: Request):
    from ..middleware.query_analytics import get_query_analytics
    try:
        get_query_analytics().record(
            query=req.query, tenant_id=req.tenant_id,
            user_id=req.user_id, intent=req.intent,
            latency_ms=req.latency_ms,
            cost_cents=req.cost_cents, ok=req.ok,
            failed_stage=req.failed_stage,
            n_retrieved=req.n_retrieved,
            confidence=req.confidence,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.get("/v1/admin/query_analytics/summary")
def admin_query_analytics_summary(since_ts: Optional[float] = None):
    from ..middleware.query_analytics import get_query_analytics
    return get_query_analytics().overall_summary(since_ts=since_ts)


@app.get("/v1/admin/query_analytics/bucketed")
def admin_query_analytics_bucketed(
    granularity: str = "hour",
    since_ts: Optional[float] = None,
    until_ts: Optional[float] = None,
    tenant_id: Optional[str] = None,
    intent: Optional[str] = None,
):
    from ..middleware.query_analytics import get_query_analytics
    try:
        buckets = get_query_analytics().bucketed(
            granularity=granularity,
            since_ts=since_ts, until_ts=until_ts,
            tenant_id=tenant_id, intent=intent,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"buckets": [b.to_dict() for b in buckets]}


@app.get("/v1/admin/query_analytics/top")
def admin_query_analytics_top(by: str = "volume", limit: int = 10,
                                   since_ts: Optional[float] = None):
    from ..middleware.query_analytics import get_query_analytics
    try:
        tops = get_query_analytics().top_queries(
            by=by, limit=limit, since_ts=since_ts)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"top": [t.to_dict() for t in tops]}


@app.get("/v1/admin/query_analytics/tenants")
def admin_query_analytics_tenants(since_ts: Optional[float] = None):
    from ..middleware.query_analytics import get_query_analytics
    summaries = get_query_analytics().tenant_summaries(
        since_ts=since_ts)
    return {"tenants": [s.to_dict() for s in summaries]}


@app.post("/v1/admin/query_analytics/reset")
def admin_query_analytics_reset(request: Request):
    from ..middleware.query_analytics import get_query_analytics
    get_query_analytics().reset()
    return {"ok": True}


# ---- v3.49 Corpus router --------------------------------------------
class CorpusProfileRequest(BaseModel):
    name: str
    keywords: List[str] = []
    intents: List[str] = []
    topics: List[str] = []
    cost_cents_per_query: float = 0.0
    quality: float = 0.8
    description: str = ""
    replace: bool = False


class CorpusRouteRequest(BaseModel):
    query: str
    intent: Optional[str] = None
    topic: Optional[str] = None
    tenant_id: Optional[str] = None


class CorpusTenantOverrideRequest(BaseModel):
    tenant_id: str
    corpus_name: str


@app.get("/v1/admin/corpus_router")
def admin_corpus_router_stats():
    from ..middleware.corpus_router import get_corpus_router
    r = get_corpus_router()
    return {
        "stats": r.stats(),
        "corpora": r.list_corpora(),
        "tenant_overrides": r.list_tenant_overrides(),
    }


@app.post("/v1/admin/corpus_router/corpus")
def admin_corpus_router_register(req: CorpusProfileRequest,
                                       request: Request):
    from ..middleware.corpus_router import (
        get_corpus_router, CorpusProfile,
    )
    try:
        get_corpus_router().register_corpus(
            CorpusProfile(
                name=req.name,
                keywords=set(req.keywords),
                intents=set(req.intents),
                topics=set(req.topics),
                cost_cents_per_query=req.cost_cents_per_query,
                quality=req.quality,
                description=req.description,
            ),
            replace=req.replace,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.delete("/v1/admin/corpus_router/corpus/{name}")
def admin_corpus_router_unregister(name: str, request: Request):
    from ..middleware.corpus_router import get_corpus_router
    if not get_corpus_router().unregister_corpus(name):
        raise HTTPException(status_code=404,
                             detail={"error": "not found"})
    return {"ok": True}


@app.post("/v1/admin/corpus_router/route")
def admin_corpus_router_route(req: CorpusRouteRequest,
                                    request: Request):
    from ..middleware.corpus_router import get_corpus_router
    try:
        decision = get_corpus_router().route(
            query=req.query, intent=req.intent,
            topic=req.topic, tenant_id=req.tenant_id,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return decision.to_dict()


@app.post("/v1/admin/corpus_router/tenant_override")
def admin_corpus_router_tenant_override(
    req: CorpusTenantOverrideRequest, request: Request,
):
    from ..middleware.corpus_router import get_corpus_router
    try:
        get_corpus_router().set_tenant_override(
            req.tenant_id, req.corpus_name)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/corpus_router/reset")
def admin_corpus_router_reset(request: Request):
    from ..middleware.corpus_router import get_corpus_router
    get_corpus_router().reset()
    return {"ok": True}


# ---- v3.50 Stream chunker -------------------------------------------
class StreamPushRequest(BaseModel):
    token: str


class StreamChunkTextRequest(BaseModel):
    text: str
    boundary: Optional[str] = None


class StreamBoundaryRequest(BaseModel):
    boundary: str


@app.get("/v1/admin/stream_chunker")
def admin_stream_chunker_stats():
    from ..middleware.stream_chunker import get_stream_chunker
    return get_stream_chunker().stats().to_dict()


@app.post("/v1/admin/stream_chunker/push")
def admin_stream_chunker_push(req: StreamPushRequest,
                                   request: Request):
    from ..middleware.stream_chunker import get_stream_chunker
    try:
        chunks = get_stream_chunker().push(req.token)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"chunks": [c.to_dict() for c in chunks]}


@app.post("/v1/admin/stream_chunker/flush")
def admin_stream_chunker_flush(request: Request):
    from ..middleware.stream_chunker import get_stream_chunker
    c = get_stream_chunker().flush()
    return {"chunk": c.to_dict() if c else None}


@app.post("/v1/admin/stream_chunker/chunk_text")
def admin_stream_chunker_chunk_text(req: StreamChunkTextRequest,
                                          request: Request):
    from ..middleware.stream_chunker import get_stream_chunker
    chunker = get_stream_chunker()
    if req.boundary:
        try:
            chunker.set_boundary(req.boundary)
        except ValueError as e:
            raise HTTPException(status_code=400,
                                 detail={"error": str(e)})
    chunks = chunker.chunk_text(req.text)
    return {"chunks": [c.to_dict() for c in chunks]}


@app.post("/v1/admin/stream_chunker/boundary")
def admin_stream_chunker_boundary(req: StreamBoundaryRequest,
                                        request: Request):
    from ..middleware.stream_chunker import get_stream_chunker
    try:
        get_stream_chunker().set_boundary(req.boundary)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True, "boundary": req.boundary}


@app.post("/v1/admin/stream_chunker/reset")
def admin_stream_chunker_reset(request: Request):
    from ..middleware.stream_chunker import get_stream_chunker
    get_stream_chunker().reset()
    return {"ok": True}


# ---- v3.51 Knowledge graph extractor --------------------------------
class KGExtractRequest(BaseModel):
    doc_id: str
    text: str
    store: bool = True


class KGExtractManyRequest(BaseModel):
    docs: List[Dict[str, Any]]
    store: bool = True


class KGNeighborsRequest(BaseModel):
    entity: str
    direction: str = "both"


@app.get("/v1/admin/kg_extractor")
def admin_kg_extractor_stats():
    from ..middleware.kg_extractor import get_kg_extractor
    return get_kg_extractor().stats()


@app.post("/v1/admin/kg_extractor/extract")
def admin_kg_extractor_extract(req: KGExtractRequest,
                                    request: Request):
    from ..middleware.kg_extractor import get_kg_extractor
    try:
        report = get_kg_extractor().extract(
            req.doc_id, req.text, store=req.store)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.post("/v1/admin/kg_extractor/extract_many")
def admin_kg_extractor_extract_many(req: KGExtractManyRequest,
                                          request: Request):
    from ..middleware.kg_extractor import get_kg_extractor
    reports = get_kg_extractor().extract_many(
        req.docs, store=req.store)
    return {"reports": [r.to_dict() for r in reports]}


@app.get("/v1/admin/kg_extractor/snapshot")
def admin_kg_extractor_snapshot(top_n: int = 10):
    from ..middleware.kg_extractor import get_kg_extractor
    try:
        s = get_kg_extractor().snapshot(top_n=top_n)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return s.to_dict()


@app.post("/v1/admin/kg_extractor/neighbors")
def admin_kg_extractor_neighbors(req: KGNeighborsRequest,
                                      request: Request):
    from ..middleware.kg_extractor import get_kg_extractor
    try:
        triples = get_kg_extractor().neighbors(
            req.entity, direction=req.direction)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"triples": [t.to_dict() for t in triples]}


@app.post("/v1/admin/kg_extractor/reset")
def admin_kg_extractor_reset(request: Request):
    from ..middleware.kg_extractor import get_kg_extractor
    get_kg_extractor().reset()
    return {"ok": True}


# ---- v3.52 Citation network analyzer --------------------------------
class CitationEdgeRequest(BaseModel):
    citing: str
    cited: str
    weight: float = 1.0
    edge_type: str = "cites"


class CitationBulkRequest(BaseModel):
    edges: List[Dict[str, Any]]


@app.get("/v1/admin/citation_network")
def admin_citation_network_stats():
    from ..middleware.citation_network import get_citation_network
    return get_citation_network().stats()


@app.post("/v1/admin/citation_network/edge")
def admin_citation_network_add_edge(req: CitationEdgeRequest,
                                          request: Request):
    from ..middleware.citation_network import get_citation_network
    try:
        get_citation_network().add_edge(
            citing=req.citing, cited=req.cited,
            weight=req.weight, edge_type=req.edge_type,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/citation_network/edges_bulk")
def admin_citation_network_bulk(req: CitationBulkRequest,
                                      request: Request):
    from ..middleware.citation_network import get_citation_network
    n = get_citation_network().add_edges_bulk(req.edges)
    return {"ok": True, "added": n}


@app.delete("/v1/admin/citation_network/doc/{doc_id}")
def admin_citation_network_remove_doc(doc_id: str,
                                            request: Request):
    from ..middleware.citation_network import get_citation_network
    n = get_citation_network().remove_doc(doc_id)
    return {"ok": True, "removed_edges": n}


@app.get("/v1/admin/citation_network/pagerank")
def admin_citation_network_pagerank(top_n: int = 20):
    from ..middleware.citation_network import get_citation_network
    try:
        rankings = get_citation_network().pagerank(top_n=top_n)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"rankings": [r.to_dict() for r in rankings]}


@app.get("/v1/admin/citation_network/hits")
def admin_citation_network_hits(top_n: int = 20):
    from ..middleware.citation_network import get_citation_network
    try:
        scores = get_citation_network().hits(top_n=top_n)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"scores": [s.to_dict() for s in scores]}


@app.get("/v1/admin/citation_network/top_cited")
def admin_citation_network_top_cited(top_n: int = 10):
    from ..middleware.citation_network import get_citation_network
    try:
        top = get_citation_network().top_cited(top_n=top_n)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"top_cited": [
        {"doc_id": d, "in_degree": c} for d, c in top
    ]}


@app.get("/v1/admin/citation_network/components")
def admin_citation_network_components():
    from ..middleware.citation_network import get_citation_network
    components = get_citation_network().connected_components()
    return {"components": [c.to_dict() for c in components]}


@app.post("/v1/admin/citation_network/reset")
def admin_citation_network_reset(request: Request):
    from ..middleware.citation_network import get_citation_network
    get_citation_network().reset()
    return {"ok": True}


# ---- v3.53 Query template extractor ---------------------------------
class TemplateRecordQueryRequest(BaseModel):
    query: str


class TemplateRecordManyRequest(BaseModel):
    queries: List[str]


class TemplateExtractRequest(BaseModel):
    min_frequency: Optional[int] = None


class TemplateNormalizeRequest(BaseModel):
    query: str


@app.get("/v1/admin/template_extractor")
def admin_template_extractor_stats():
    from ..middleware.template_extractor import get_template_extractor
    return get_template_extractor().stats()


@app.post("/v1/admin/template_extractor/record")
def admin_template_extractor_record(req: TemplateRecordQueryRequest,
                                          request: Request):
    from ..middleware.template_extractor import get_template_extractor
    try:
        get_template_extractor().record(req.query)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.post("/v1/admin/template_extractor/record_many")
def admin_template_extractor_record_many(
    req: TemplateRecordManyRequest, request: Request,
):
    from ..middleware.template_extractor import get_template_extractor
    n = get_template_extractor().record_many(req.queries)
    return {"ok": True, "added": n}


@app.post("/v1/admin/template_extractor/extract")
def admin_template_extractor_extract(req: TemplateExtractRequest,
                                           request: Request):
    from ..middleware.template_extractor import get_template_extractor
    try:
        report = get_template_extractor().extract(
            min_frequency=req.min_frequency)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return report.to_dict()


@app.post("/v1/admin/template_extractor/normalize")
def admin_template_extractor_normalize(
    req: TemplateNormalizeRequest, request: Request,
):
    from ..middleware.template_extractor import get_template_extractor
    try:
        return get_template_extractor().normalize(req.query)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})


@app.post("/v1/admin/template_extractor/reset")
def admin_template_extractor_reset(request: Request):
    from ..middleware.template_extractor import get_template_extractor
    get_template_extractor().reset()
    return {"ok": True}


# ---- v3.54 Slow query analyzer --------------------------------------
class SlowRecordRequest(BaseModel):
    query: str
    total_latency_ms: float
    stages: Optional[Dict[str, float]] = None
    n_retrieved: int = 0
    metadata: Optional[Dict[str, Any]] = None


@app.get("/v1/admin/slow_query_analyzer")
def admin_slow_query_analyzer_stats():
    from ..middleware.slow_query_analyzer import get_slow_query_analyzer
    return get_slow_query_analyzer().stats()


@app.post("/v1/admin/slow_query_analyzer/record")
def admin_slow_query_analyzer_record(req: SlowRecordRequest,
                                          request: Request):
    from ..middleware.slow_query_analyzer import get_slow_query_analyzer
    try:
        get_slow_query_analyzer().record(
            query=req.query,
            total_latency_ms=req.total_latency_ms,
            stages=req.stages, n_retrieved=req.n_retrieved,
            metadata=req.metadata,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.get("/v1/admin/slow_query_analyzer/analyze")
def admin_slow_query_analyzer_analyze():
    from ..middleware.slow_query_analyzer import get_slow_query_analyzer
    report = get_slow_query_analyzer().analyze()
    return report.to_dict()


@app.post("/v1/admin/slow_query_analyzer/reset")
def admin_slow_query_analyzer_reset(request: Request):
    from ..middleware.slow_query_analyzer import get_slow_query_analyzer
    get_slow_query_analyzer().reset()
    return {"ok": True}


# ---- v3.55 Confidence calibrator ------------------------------------
class CalibrationRecordRequest(BaseModel):
    predicted: float
    actual: bool
    metadata: Optional[Dict[str, Any]] = None


class CalibrationCalibrateRequest(BaseModel):
    raw: float


@app.get("/v1/admin/confidence_calibrator")
def admin_confidence_calibrator_stats():
    from ..middleware.confidence_calibrator import get_confidence_calibrator
    return get_confidence_calibrator().stats()


@app.post("/v1/admin/confidence_calibrator/record")
def admin_confidence_calibrator_record(req: CalibrationRecordRequest,
                                            request: Request):
    from ..middleware.confidence_calibrator import get_confidence_calibrator
    try:
        get_confidence_calibrator().record(
            predicted=req.predicted, actual=req.actual,
            metadata=req.metadata,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"ok": True}


@app.get("/v1/admin/confidence_calibrator/analyze")
def admin_confidence_calibrator_analyze():
    from ..middleware.confidence_calibrator import get_confidence_calibrator
    return get_confidence_calibrator().analyze().to_dict()


@app.post("/v1/admin/confidence_calibrator/calibrate")
def admin_confidence_calibrator_calibrate(
    req: CalibrationCalibrateRequest, request: Request,
):
    from ..middleware.confidence_calibrator import get_confidence_calibrator
    try:
        cal = get_confidence_calibrator().calibrated_confidence(
            req.raw)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"raw": req.raw, "calibrated": cal}


@app.get("/v1/admin/confidence_calibrator/curve")
def admin_confidence_calibrator_curve():
    from ..middleware.confidence_calibrator import get_confidence_calibrator
    return {"points":
             get_confidence_calibrator().calibration_curve()}


@app.post("/v1/admin/confidence_calibrator/reset")
def admin_confidence_calibrator_reset(request: Request):
    from ..middleware.confidence_calibrator import get_confidence_calibrator
    get_confidence_calibrator().reset()
    return {"ok": True}


# ---- v3.56 Preview generator ----------------------------------------
class PreviewGenerateRequest(BaseModel):
    doc_id: str
    text: str
    query: str
    max_length: Optional[int] = None
    window: Optional[int] = None
    n_snippets: Optional[int] = None
    highlight_mode: Optional[str] = None


class PreviewGenerateManyRequest(BaseModel):
    docs: List[Dict[str, Any]]
    query: str
    max_length: Optional[int] = None
    window: Optional[int] = None
    n_snippets: Optional[int] = None
    highlight_mode: Optional[str] = None


@app.get("/v1/admin/preview_generator")
def admin_preview_generator_stats():
    from ..middleware.preview_generator import get_preview_generator
    return get_preview_generator().stats()


@app.post("/v1/admin/preview_generator/generate")
def admin_preview_generator_generate(req: PreviewGenerateRequest,
                                          request: Request):
    from ..middleware.preview_generator import get_preview_generator
    try:
        preview = get_preview_generator().generate(
            doc_id=req.doc_id, text=req.text,
            query=req.query,
            max_length=req.max_length, window=req.window,
            n_snippets=req.n_snippets,
            highlight_mode=req.highlight_mode,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return preview.to_dict()


@app.post("/v1/admin/preview_generator/generate_many")
def admin_preview_generator_generate_many(
    req: PreviewGenerateManyRequest, request: Request,
):
    from ..middleware.preview_generator import get_preview_generator
    kwargs = {}
    for f in ("max_length", "window", "n_snippets",
                "highlight_mode"):
        v = getattr(req, f)
        if v is not None:
            kwargs[f] = v
    try:
        previews = get_preview_generator().generate_many(
            req.docs, req.query, **kwargs)
    except ValueError as e:
        raise HTTPException(status_code=400, detail={"error": str(e)})
    return {"previews": [p.to_dict() for p in previews]}


@app.post("/v1/admin/preview_generator/reset")
def admin_preview_generator_reset(request: Request):
    from ..middleware.preview_generator import get_preview_generator
    get_preview_generator().reset()
    return {"ok": True}


# =========================== v3.57 Retrieval Agreement Scorer ===
class AgreementScoreRequest(BaseModel):
    retriever_results: Dict[str, List[str]]


@app.get("/v1/admin/retrieval_agreement")
def admin_retrieval_agreement_stats():
    from ..middleware.retrieval_agreement import \
        get_retrieval_agreement_scorer
    return get_retrieval_agreement_scorer().stats()


@app.post("/v1/admin/retrieval_agreement/score")
def admin_retrieval_agreement_score(
        req: AgreementScoreRequest, request: Request):
    from ..middleware.retrieval_agreement import \
        get_retrieval_agreement_scorer
    try:
        report = get_retrieval_agreement_scorer().score(
            req.retriever_results)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/retrieval_agreement/reset")
def admin_retrieval_agreement_reset(request: Request):
    from ..middleware.retrieval_agreement import \
        get_retrieval_agreement_scorer
    get_retrieval_agreement_scorer().reset()
    return {"ok": True}


# =========================== v3.58 Answer Source Balance ========
class SourceBalanceRequest(BaseModel):
    citations: List[str]


@app.get("/v1/admin/answer_source_balance")
def admin_answer_source_balance_stats():
    from ..middleware.answer_source_balance import \
        get_answer_source_balancer
    return get_answer_source_balancer().stats()


@app.post("/v1/admin/answer_source_balance/analyze")
def admin_answer_source_balance_analyze(
        req: SourceBalanceRequest, request: Request):
    from ..middleware.answer_source_balance import \
        get_answer_source_balancer
    try:
        report = get_answer_source_balancer().analyze(
            req.citations)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/answer_source_balance/reset")
def admin_answer_source_balance_reset(request: Request):
    from ..middleware.answer_source_balance import \
        get_answer_source_balancer
    get_answer_source_balancer().reset()
    return {"ok": True}


# =========================== v3.59 Doc Staleness Scorer =========
class StalenessScoreRequest(BaseModel):
    doc_id: str
    age_days: float
    citation_count: int = 0
    superseded: bool = False


class StalenessScoreManyRequest(BaseModel):
    docs: List[Dict[str, Any]]


@app.get("/v1/admin/doc_staleness")
def admin_doc_staleness_stats():
    from ..middleware.doc_staleness import get_doc_staleness_scorer
    return get_doc_staleness_scorer().stats()


@app.post("/v1/admin/doc_staleness/score")
def admin_doc_staleness_score(
        req: StalenessScoreRequest, request: Request):
    from ..middleware.doc_staleness import get_doc_staleness_scorer
    try:
        s = get_doc_staleness_scorer().score_doc(
            doc_id=req.doc_id,
            age_days=req.age_days,
            citation_count=req.citation_count,
            superseded=req.superseded,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return s.to_dict()


@app.post("/v1/admin/doc_staleness/score_many")
def admin_doc_staleness_score_many(
        req: StalenessScoreManyRequest, request: Request):
    from ..middleware.doc_staleness import get_doc_staleness_scorer
    scorer = get_doc_staleness_scorer()
    scores = scorer.score_many(req.docs)
    return {
        "scores": [s.to_dict() for s in scores],
        "summary": scorer.summarize(scores),
    }


@app.post("/v1/admin/doc_staleness/reset")
def admin_doc_staleness_reset(request: Request):
    from ..middleware.doc_staleness import get_doc_staleness_scorer
    get_doc_staleness_scorer().reset()
    return {"ok": True}


# =========================== v3.60 Retrieval Coverage ===========
class CoverageAnalyzeRequest(BaseModel):
    query: str
    docs: List[Dict[str, Any]]


@app.get("/v1/admin/retrieval_coverage")
def admin_retrieval_coverage_stats():
    from ..middleware.retrieval_coverage import \
        get_retrieval_coverage_analyzer
    return get_retrieval_coverage_analyzer().stats()


@app.post("/v1/admin/retrieval_coverage/analyze")
def admin_retrieval_coverage_analyze(
        req: CoverageAnalyzeRequest, request: Request):
    from ..middleware.retrieval_coverage import \
        get_retrieval_coverage_analyzer
    try:
        report = get_retrieval_coverage_analyzer().analyze(
            req.query, req.docs)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/retrieval_coverage/reset")
def admin_retrieval_coverage_reset(request: Request):
    from ..middleware.retrieval_coverage import \
        get_retrieval_coverage_analyzer
    get_retrieval_coverage_analyzer().reset()
    return {"ok": True}


# =========================== v3.61 Answer Hedging ===============
class HedgingDetectRequest(BaseModel):
    text: str


@app.get("/v1/admin/answer_hedging")
def admin_answer_hedging_stats():
    from ..middleware.answer_hedging import \
        get_answer_hedging_detector
    return get_answer_hedging_detector().stats()


@app.post("/v1/admin/answer_hedging/detect")
def admin_answer_hedging_detect(
        req: HedgingDetectRequest, request: Request):
    from ..middleware.answer_hedging import \
        get_answer_hedging_detector
    try:
        report = get_answer_hedging_detector().detect(req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/answer_hedging/reset")
def admin_answer_hedging_reset(request: Request):
    from ..middleware.answer_hedging import \
        get_answer_hedging_detector
    get_answer_hedging_detector().reset()
    return {"ok": True}


# =========================== v3.62 Query Routing Optimizer ======
class RouterRecordRequest(BaseModel):
    query: str
    retriever: str
    outcome_score: float


class RouterRecommendRequest(BaseModel):
    query: str


@app.get("/v1/admin/query_routing_optimizer")
def admin_query_routing_stats():
    from ..middleware.query_routing_optimizer import \
        get_query_routing_optimizer
    return get_query_routing_optimizer().stats()


@app.post("/v1/admin/query_routing_optimizer/record")
def admin_query_routing_record(
        req: RouterRecordRequest, request: Request):
    from ..middleware.query_routing_optimizer import \
        get_query_routing_optimizer
    try:
        get_query_routing_optimizer().record(
            req.query, req.retriever, req.outcome_score)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"ok": True}


@app.post("/v1/admin/query_routing_optimizer/recommend")
def admin_query_routing_recommend(
        req: RouterRecommendRequest, request: Request):
    from ..middleware.query_routing_optimizer import \
        get_query_routing_optimizer
    try:
        rec = get_query_routing_optimizer().recommend(req.query)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return rec.to_dict()


@app.get("/v1/admin/query_routing_optimizer/buckets")
def admin_query_routing_buckets():
    from ..middleware.query_routing_optimizer import \
        get_query_routing_optimizer
    return get_query_routing_optimizer().buckets()


@app.post("/v1/admin/query_routing_optimizer/reset")
def admin_query_routing_reset(request: Request):
    from ..middleware.query_routing_optimizer import \
        get_query_routing_optimizer
    get_query_routing_optimizer().reset()
    return {"ok": True}


# =========================== v3.63 Answer Specificity ===========
class SpecificityScoreRequest(BaseModel):
    text: str


@app.get("/v1/admin/answer_specificity")
def admin_answer_specificity_stats():
    from ..middleware.answer_specificity import \
        get_answer_specificity_scorer
    return get_answer_specificity_scorer().stats()


@app.post("/v1/admin/answer_specificity/score")
def admin_answer_specificity_score(
        req: SpecificityScoreRequest, request: Request):
    from ..middleware.answer_specificity import \
        get_answer_specificity_scorer
    try:
        report = get_answer_specificity_scorer().score(req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/answer_specificity/reset")
def admin_answer_specificity_reset(request: Request):
    from ..middleware.answer_specificity import \
        get_answer_specificity_scorer
    get_answer_specificity_scorer().reset()
    return {"ok": True}


# =========================== v3.64 Rank Stability ===============
class StabilityRecordRequest(BaseModel):
    query: str
    top_k: List[str]


@app.get("/v1/admin/rank_stability")
def admin_rank_stability_stats():
    from ..middleware.rank_stability import \
        get_rank_stability_tracker
    return get_rank_stability_tracker().stats()


@app.post("/v1/admin/rank_stability/record")
def admin_rank_stability_record(
        req: StabilityRecordRequest, request: Request):
    from ..middleware.rank_stability import \
        get_rank_stability_tracker
    try:
        get_rank_stability_tracker().record(
            req.query, req.top_k)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"ok": True}


@app.get("/v1/admin/rank_stability/analyze")
def admin_rank_stability_analyze(query: str):
    from ..middleware.rank_stability import \
        get_rank_stability_tracker
    return get_rank_stability_tracker().analyze(query).to_dict()


@app.get("/v1/admin/rank_stability/flaky")
def admin_rank_stability_flaky():
    from ..middleware.rank_stability import \
        get_rank_stability_tracker
    return {"flaky_queries":
              get_rank_stability_tracker().flaky_queries()}


@app.post("/v1/admin/rank_stability/reset")
def admin_rank_stability_reset(request: Request):
    from ..middleware.rank_stability import \
        get_rank_stability_tracker
    get_rank_stability_tracker().reset()
    return {"ok": True}


# =========================== v3.65 Session Topic Tracker ========
class SessionRecordRequest(BaseModel):
    session_id: str
    query: str


@app.get("/v1/admin/session_topic_tracker")
def admin_session_topic_stats():
    from ..middleware.session_topic_tracker import \
        get_session_topic_tracker
    return get_session_topic_tracker().stats()


@app.post("/v1/admin/session_topic_tracker/record")
def admin_session_topic_record(
        req: SessionRecordRequest, request: Request):
    from ..middleware.session_topic_tracker import \
        get_session_topic_tracker
    try:
        turn = get_session_topic_tracker().record(
            req.session_id, req.query)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return turn.to_dict()


@app.get("/v1/admin/session_topic_tracker/analyze")
def admin_session_topic_analyze(session_id: str):
    from ..middleware.session_topic_tracker import \
        get_session_topic_tracker
    return get_session_topic_tracker().analyze_session(
        session_id).to_dict()


@app.get("/v1/admin/session_topic_tracker/sessions")
def admin_session_topic_sessions():
    from ..middleware.session_topic_tracker import \
        get_session_topic_tracker
    return {"sessions":
              get_session_topic_tracker().sessions()}


@app.post("/v1/admin/session_topic_tracker/reset")
def admin_session_topic_reset(request: Request):
    from ..middleware.session_topic_tracker import \
        get_session_topic_tracker
    get_session_topic_tracker().reset()
    return {"ok": True}


# =========================== v3.66 Snippet Deduplicator =========
class SnippetDedupRequest(BaseModel):
    snippets: List[Dict[str, Any]]


@app.get("/v1/admin/snippet_dedup")
def admin_snippet_dedup_stats():
    from ..middleware.snippet_dedup import \
        get_snippet_deduplicator
    return get_snippet_deduplicator().stats()


@app.post("/v1/admin/snippet_dedup/dedupe")
def admin_snippet_dedup_dedupe(
        req: SnippetDedupRequest, request: Request):
    from ..middleware.snippet_dedup import \
        get_snippet_deduplicator
    try:
        report = get_snippet_deduplicator().dedupe(req.snippets)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/snippet_dedup/reset")
def admin_snippet_dedup_reset(request: Request):
    from ..middleware.snippet_dedup import \
        get_snippet_deduplicator
    get_snippet_deduplicator().reset()
    return {"ok": True}


# =========================== v3.67 Citation Normalizer ==========
class CitationNormalizeRequest(BaseModel):
    text: str


@app.get("/v1/admin/citation_normalizer")
def admin_citation_normalizer_stats():
    from ..middleware.citation_normalizer import \
        get_citation_style_normalizer
    return get_citation_style_normalizer().stats()


@app.post("/v1/admin/citation_normalizer/normalize")
def admin_citation_normalizer_normalize(
        req: CitationNormalizeRequest, request: Request):
    from ..middleware.citation_normalizer import \
        get_citation_style_normalizer
    try:
        report = get_citation_style_normalizer().normalize(
            req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/citation_normalizer/reset")
def admin_citation_normalizer_reset(request: Request):
    from ..middleware.citation_normalizer import \
        get_citation_style_normalizer
    get_citation_style_normalizer().reset()
    return {"ok": True}


# =========================== v3.68 Query Precision ==============
class PrecisionClassifyRequest(BaseModel):
    query: str


class PrecisionClassifyManyRequest(BaseModel):
    queries: List[str]


@app.get("/v1/admin/query_precision_classifier")
def admin_query_precision_stats():
    from ..middleware.query_precision_classifier import \
        get_query_precision_classifier
    return get_query_precision_classifier().stats()


@app.post("/v1/admin/query_precision_classifier/classify")
def admin_query_precision_classify(
        req: PrecisionClassifyRequest, request: Request):
    from ..middleware.query_precision_classifier import \
        get_query_precision_classifier
    try:
        report = get_query_precision_classifier().classify(
            req.query)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/query_precision_classifier/classify_many")
def admin_query_precision_classify_many(
        req: PrecisionClassifyManyRequest, request: Request):
    from ..middleware.query_precision_classifier import \
        get_query_precision_classifier
    reports = get_query_precision_classifier().classify_many(
        req.queries)
    return {"reports": [r.to_dict() for r in reports]}


@app.post("/v1/admin/query_precision_classifier/reset")
def admin_query_precision_reset(request: Request):
    from ..middleware.query_precision_classifier import \
        get_query_precision_classifier
    get_query_precision_classifier().reset()
    return {"ok": True}


# =========================== v3.69 Answer Format Validator ======
class FormatValidateRequest(BaseModel):
    text: str
    format_name: str


class FormatDetectRequest(BaseModel):
    text: str


@app.get("/v1/admin/answer_format_validator")
def admin_answer_format_stats():
    from ..middleware.answer_format_validator import \
        get_answer_format_validator
    return get_answer_format_validator().stats()


@app.post("/v1/admin/answer_format_validator/validate")
def admin_answer_format_validate(
        req: FormatValidateRequest, request: Request):
    from ..middleware.answer_format_validator import \
        get_answer_format_validator
    try:
        report = get_answer_format_validator().validate(
            req.text, req.format_name)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/answer_format_validator/detect")
def admin_answer_format_detect(
        req: FormatDetectRequest, request: Request):
    from ..middleware.answer_format_validator import \
        get_answer_format_validator
    try:
        fmt = get_answer_format_validator().detect_format(
            req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"detected_format": fmt}


@app.get("/v1/admin/answer_format_validator/formats")
def admin_answer_format_list():
    from ..middleware.answer_format_validator import \
        get_answer_format_validator
    return {"formats":
              get_answer_format_validator().available_formats()}


@app.post("/v1/admin/answer_format_validator/reset")
def admin_answer_format_reset(request: Request):
    from ..middleware.answer_format_validator import \
        get_answer_format_validator
    get_answer_format_validator().reset()
    return {"ok": True}


# =========================== v3.70 Retrieval Delta ==============
class DeltaAnalyzeRequest(BaseModel):
    run_a: List[str]
    run_b: List[str]


@app.get("/v1/admin/retrieval_delta")
def admin_retrieval_delta_stats():
    from ..middleware.retrieval_delta import \
        get_retrieval_delta_analyzer
    return get_retrieval_delta_analyzer().stats()


@app.post("/v1/admin/retrieval_delta/analyze")
def admin_retrieval_delta_analyze(
        req: DeltaAnalyzeRequest, request: Request):
    from ..middleware.retrieval_delta import \
        get_retrieval_delta_analyzer
    try:
        report = get_retrieval_delta_analyzer().analyze(
            req.run_a, req.run_b)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/retrieval_delta/reset")
def admin_retrieval_delta_reset(request: Request):
    from ..middleware.retrieval_delta import \
        get_retrieval_delta_analyzer
    get_retrieval_delta_analyzer().reset()
    return {"ok": True}


# =========================== v3.71 Query Paraphrase =============
class ParaphraseCompareRequest(BaseModel):
    query_a: str
    query_b: str


class ParaphraseFindRequest(BaseModel):
    query: str
    candidates: List[str]


@app.get("/v1/admin/query_paraphrase")
def admin_query_paraphrase_stats():
    from ..middleware.query_paraphrase import \
        get_query_paraphrase_detector
    return get_query_paraphrase_detector().stats()


@app.post("/v1/admin/query_paraphrase/compare")
def admin_query_paraphrase_compare(
        req: ParaphraseCompareRequest, request: Request):
    from ..middleware.query_paraphrase import \
        get_query_paraphrase_detector
    try:
        report = get_query_paraphrase_detector().compare(
            req.query_a, req.query_b)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/query_paraphrase/find")
def admin_query_paraphrase_find(
        req: ParaphraseFindRequest, request: Request):
    from ..middleware.query_paraphrase import \
        get_query_paraphrase_detector
    try:
        reports = get_query_paraphrase_detector().find_paraphrases(
            req.query, req.candidates)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"reports": [r.to_dict() for r in reports]}


@app.post("/v1/admin/query_paraphrase/reset")
def admin_query_paraphrase_reset(request: Request):
    from ..middleware.query_paraphrase import \
        get_query_paraphrase_detector
    get_query_paraphrase_detector().reset()
    return {"ok": True}


# =========================== v3.72 Doc Quality Scorer ===========
class QualityScoreRequest(BaseModel):
    doc_id: str
    text: str


class QualityScoreManyRequest(BaseModel):
    docs: List[Dict[str, Any]]


@app.get("/v1/admin/doc_quality")
def admin_doc_quality_stats():
    from ..middleware.doc_quality import get_doc_quality_scorer
    return get_doc_quality_scorer().stats()


@app.post("/v1/admin/doc_quality/score")
def admin_doc_quality_score(
        req: QualityScoreRequest, request: Request):
    from ..middleware.doc_quality import get_doc_quality_scorer
    try:
        report = get_doc_quality_scorer().score(
            req.doc_id, req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/doc_quality/score_many")
def admin_doc_quality_score_many(
        req: QualityScoreManyRequest, request: Request):
    from ..middleware.doc_quality import get_doc_quality_scorer
    scorer = get_doc_quality_scorer()
    reports = scorer.score_many(req.docs)
    return {
        "reports": [r.to_dict() for r in reports],
        "summary": scorer.summarize(reports),
    }


@app.post("/v1/admin/doc_quality/reset")
def admin_doc_quality_reset(request: Request):
    from ..middleware.doc_quality import get_doc_quality_scorer
    get_doc_quality_scorer().reset()
    return {"ok": True}


# =========================== v3.73 Answer Coverage Gap ==========
class CoverageGapCheckRequest(BaseModel):
    query: str
    answer: str


@app.get("/v1/admin/answer_coverage_gap")
def admin_answer_coverage_gap_stats():
    from ..middleware.answer_coverage_gap import \
        get_answer_coverage_gap_detector
    return get_answer_coverage_gap_detector().stats()


@app.post("/v1/admin/answer_coverage_gap/check")
def admin_answer_coverage_gap_check(
        req: CoverageGapCheckRequest, request: Request):
    from ..middleware.answer_coverage_gap import \
        get_answer_coverage_gap_detector
    try:
        report = get_answer_coverage_gap_detector().check(
            req.query, req.answer)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/answer_coverage_gap/reset")
def admin_answer_coverage_gap_reset(request: Request):
    from ..middleware.answer_coverage_gap import \
        get_answer_coverage_gap_detector
    get_answer_coverage_gap_detector().reset()
    return {"ok": True}


# =========================== v3.74 Query Throughput =============
class ThroughputRecordRequest(BaseModel):
    latency_ms: float


@app.get("/v1/admin/query_throughput")
def admin_query_throughput_stats():
    from ..middleware.query_throughput import \
        get_query_throughput_profiler
    return get_query_throughput_profiler().stats()


@app.post("/v1/admin/query_throughput/record")
def admin_query_throughput_record(
        req: ThroughputRecordRequest, request: Request):
    from ..middleware.query_throughput import \
        get_query_throughput_profiler
    try:
        get_query_throughput_profiler().record(req.latency_ms)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"ok": True}


@app.get("/v1/admin/query_throughput/analyze")
def admin_query_throughput_analyze():
    from ..middleware.query_throughput import \
        get_query_throughput_profiler
    return get_query_throughput_profiler().analyze().to_dict()


@app.post("/v1/admin/query_throughput/reset")
def admin_query_throughput_reset(request: Request):
    from ..middleware.query_throughput import \
        get_query_throughput_profiler
    get_query_throughput_profiler().reset()
    return {"ok": True}


# =========================== v3.75 Query Normalizer =============
class QueryNormalizeRequest(BaseModel):
    text: str
    strip_nikud: Optional[bool] = None
    lowercase: Optional[bool] = None
    strip_punctuation: Optional[bool] = None
    strip_question_marks: Optional[bool] = None


@app.get("/v1/admin/query_normalizer")
def admin_query_normalizer_stats():
    from ..middleware.query_normalizer import \
        get_query_normalizer
    return get_query_normalizer().stats()


@app.post("/v1/admin/query_normalizer/normalize")
def admin_query_normalizer_normalize(
        req: QueryNormalizeRequest, request: Request):
    from ..middleware.query_normalizer import \
        get_query_normalizer, NormalizeOptions
    n = get_query_normalizer()
    defaults = n.stats()["defaults"]
    opts = NormalizeOptions(
        strip_nikud=req.strip_nikud
            if req.strip_nikud is not None
            else defaults["strip_nikud"],
        lowercase=req.lowercase
            if req.lowercase is not None
            else defaults["lowercase"],
        strip_punctuation=req.strip_punctuation
            if req.strip_punctuation is not None
            else defaults["strip_punctuation"],
        strip_question_marks=req.strip_question_marks
            if req.strip_question_marks is not None
            else defaults["strip_question_marks"],
    )
    try:
        result = n.normalize(req.text, opts)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return result.to_dict()


@app.post("/v1/admin/query_normalizer/reset")
def admin_query_normalizer_reset(request: Request):
    from ..middleware.query_normalizer import \
        get_query_normalizer
    get_query_normalizer().reset()
    return {"ok": True}


# =========================== v3.76 Chunk Overlap Analyzer =======
class ChunkAnalyzeRequest(BaseModel):
    chunks: List[str]
    source_text: Optional[str] = None


@app.get("/v1/admin/chunk_overlap")
def admin_chunk_overlap_stats():
    from ..middleware.chunk_overlap import \
        get_chunk_overlap_analyzer
    return get_chunk_overlap_analyzer().stats()


@app.post("/v1/admin/chunk_overlap/analyze")
def admin_chunk_overlap_analyze(
        req: ChunkAnalyzeRequest, request: Request):
    from ..middleware.chunk_overlap import \
        get_chunk_overlap_analyzer
    try:
        report = get_chunk_overlap_analyzer().analyze(
            req.chunks, req.source_text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/chunk_overlap/reset")
def admin_chunk_overlap_reset(request: Request):
    from ..middleware.chunk_overlap import \
        get_chunk_overlap_analyzer
    get_chunk_overlap_analyzer().reset()
    return {"ok": True}


# =========================== v3.77 Numeric Consistency ==========
class NumericCheckRequest(BaseModel):
    answer: str
    sources: List[str]


class NumericExtractRequest(BaseModel):
    text: str


@app.get("/v1/admin/answer_numeric_consistency")
def admin_answer_numeric_stats():
    from ..middleware.answer_numeric_consistency import \
        get_answer_numeric_consistency_checker
    return get_answer_numeric_consistency_checker().stats()


@app.post("/v1/admin/answer_numeric_consistency/check")
def admin_answer_numeric_check(
        req: NumericCheckRequest, request: Request):
    from ..middleware.answer_numeric_consistency import \
        get_answer_numeric_consistency_checker
    try:
        report = get_answer_numeric_consistency_checker().check(
            req.answer, req.sources)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


@app.post("/v1/admin/answer_numeric_consistency/extract")
def admin_answer_numeric_extract(
        req: NumericExtractRequest, request: Request):
    from ..middleware.answer_numeric_consistency import \
        get_answer_numeric_consistency_checker
    try:
        facts = get_answer_numeric_consistency_checker().extract(
            req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"facts": [f.to_dict() for f in facts]}


@app.post("/v1/admin/answer_numeric_consistency/reset")
def admin_answer_numeric_reset(request: Request):
    from ..middleware.answer_numeric_consistency import \
        get_answer_numeric_consistency_checker
    get_answer_numeric_consistency_checker().reset()
    return {"ok": True}


# =========================== v3.78 Error Budget Tracker =========
class ErrorBudgetRecordRequest(BaseModel):
    is_error: bool


@app.get("/v1/admin/error_budget")
def admin_error_budget_stats():
    from ..middleware.error_budget import \
        get_error_budget_tracker
    return get_error_budget_tracker().stats()


@app.post("/v1/admin/error_budget/record")
def admin_error_budget_record(
        req: ErrorBudgetRecordRequest, request: Request):
    from ..middleware.error_budget import \
        get_error_budget_tracker
    get_error_budget_tracker().record(req.is_error)
    return {"ok": True}


@app.get("/v1/admin/error_budget/analyze")
def admin_error_budget_analyze():
    from ..middleware.error_budget import \
        get_error_budget_tracker
    return get_error_budget_tracker().analyze().to_dict()


@app.post("/v1/admin/error_budget/reset")
def admin_error_budget_reset(request: Request):
    from ..middleware.error_budget import \
        get_error_budget_tracker
    get_error_budget_tracker().reset()
    return {"ok": True}


# =========================== v3.79 Request Fingerprint ==========
class FingerprintComputeRequest(BaseModel):
    request: Dict[str, Any]


class FingerprintMatchRequest(BaseModel):
    a: Dict[str, Any]
    b: Dict[str, Any]
    strict: bool = False


@app.get("/v1/admin/request_fingerprint")
def admin_request_fingerprint_stats():
    from ..middleware.request_fingerprint import \
        get_request_fingerprinter
    return get_request_fingerprinter().stats()


@app.post("/v1/admin/request_fingerprint/compute")
def admin_request_fingerprint_compute(
        req: FingerprintComputeRequest, request: Request):
    from ..middleware.request_fingerprint import \
        get_request_fingerprinter
    try:
        fp = get_request_fingerprinter().compute(req.request)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return fp.to_dict()


@app.post("/v1/admin/request_fingerprint/match")
def admin_request_fingerprint_match(
        req: FingerprintMatchRequest, request: Request):
    from ..middleware.request_fingerprint import \
        get_request_fingerprinter
    try:
        ok = get_request_fingerprinter().matches(
            req.a, req.b, strict=req.strict)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"matches": ok, "strict": req.strict}


@app.post("/v1/admin/request_fingerprint/reset")
def admin_request_fingerprint_reset(request: Request):
    from ..middleware.request_fingerprint import \
        get_request_fingerprinter
    get_request_fingerprinter().reset()
    return {"ok": True}


# =========================== v3.80 Circuit Breaker ==============
class BreakerRecordRequest(BaseModel):
    name: str
    success: bool


@app.get("/v1/admin/circuit_breaker")
def admin_circuit_breaker_stats():
    from ..middleware.circuit_breaker import \
        get_circuit_breaker_registry
    return get_circuit_breaker_registry().stats()


@app.post("/v1/admin/circuit_breaker/record")
def admin_circuit_breaker_record(
        req: BreakerRecordRequest, request: Request):
    from ..middleware.circuit_breaker import \
        get_circuit_breaker_registry
    try:
        get_circuit_breaker_registry().record(
            req.name, req.success)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return {"ok": True,
            "state": get_circuit_breaker_registry().state(
                req.name)}


@app.get("/v1/admin/circuit_breaker/state")
def admin_circuit_breaker_state(name: str):
    from ..middleware.circuit_breaker import \
        get_circuit_breaker_registry
    try:
        return {"name": name,
                "state": get_circuit_breaker_registry().state(
                    name)}
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))


@app.get("/v1/admin/circuit_breaker/all")
def admin_circuit_breaker_all():
    from ..middleware.circuit_breaker import \
        get_circuit_breaker_registry
    return {"breakers":
              get_circuit_breaker_registry().all_stats()}


@app.post("/v1/admin/circuit_breaker/reset")
def admin_circuit_breaker_reset(
        name: Optional[str] = None):
    from ..middleware.circuit_breaker import \
        get_circuit_breaker_registry
    get_circuit_breaker_registry().reset(name)
    return {"ok": True, "reset": name or "all"}


# ========================== v3.81–v3.95 admin routes (compact) ==========
# These power the static admin dashboard at /admin. Each one is a
# read-only stats/report endpoint plus the minimum write endpoint
# the dashboard needs.

@app.get("/v1/admin/corpus_ingestion/analyze")
def admin_corpus_ingestion_analyze(window_sec: Optional[int] = None):
    from ..middleware.corpus_ingestion import \
        get_corpus_ingestion_tracker
    return get_corpus_ingestion_tracker().analyze(
        window_sec=window_sec).to_dict()


@app.get("/v1/admin/embedding_version")
def admin_embedding_version_stats():
    from ..middleware.embedding_version import \
        get_embedding_version_manager
    return get_embedding_version_manager().stats()


@app.get("/v1/admin/embedding_version/migration")
def admin_embedding_version_migration():
    from ..middleware.embedding_version import \
        get_embedding_version_manager
    return get_embedding_version_manager().migration_report().to_dict()


@app.get("/v1/admin/index_rebuild_scheduler")
def admin_index_rebuild_stats():
    from ..middleware.index_rebuild_scheduler import \
        get_index_rebuild_scheduler
    return get_index_rebuild_scheduler().state().to_dict()


@app.get("/v1/admin/dsr_handler/report")
def admin_dsr_handler_report():
    from ..middleware.dsr_handler import get_dsr_handler
    return get_dsr_handler().report().to_dict()


@app.get("/v1/admin/lawful_basis")
def admin_lawful_basis_stats():
    from ..middleware.lawful_basis import \
        get_lawful_basis_tracker
    return get_lawful_basis_tracker().stats()


@app.get("/v1/admin/tenant_isolation")
def admin_tenant_isolation_stats():
    from ..middleware.tenant_isolation import \
        get_tenant_isolation_enforcer
    return get_tenant_isolation_enforcer().stats()


@app.get("/v1/admin/zero_result_tracker/report")
def admin_zero_result_report(top_k: int = 20,
                                  min_occurrences: int = 1):
    from ..middleware.zero_result_tracker import \
        get_zero_result_tracker
    return get_zero_result_tracker().report(
        top_k=top_k,
        min_occurrences=min_occurrences).to_dict()


@app.get("/v1/admin/failed_generation/report")
def admin_failed_generation_report(window_sec: Optional[int] = None):
    from ..middleware.failed_generation import \
        get_failed_generation_tracker
    return get_failed_generation_tracker().report(
        window_sec=window_sec).to_dict()


@app.get("/v1/admin/per_query_cost/total")
def admin_per_query_cost_total(window_sec: Optional[int] = None):
    from ..middleware.per_query_cost import \
        get_per_query_cost_attributor
    return {"total_usd":
              get_per_query_cost_attributor().total_usd(
                  window_sec=window_sec)}


@app.get("/v1/admin/per_query_cost/all_tenants")
def admin_per_query_cost_all_tenants(
        window_sec: Optional[int] = None):
    from ..middleware.per_query_cost import \
        get_per_query_cost_attributor
    sums = get_per_query_cost_attributor().all_tenants_summary(
        window_sec=window_sec)
    return {"tenants": [s.to_dict() for s in sums]}


@app.get("/v1/admin/synthetic_probe/all")
def admin_synthetic_probe_all():
    from ..middleware.synthetic_probe import \
        get_synthetic_probe_runner
    return {"probes":
              get_synthetic_probe_runner().all_probe_stats()}


@app.get("/v1/admin/composite_health_score/aggregate")
def admin_composite_health_aggregate():
    from ..middleware.composite_health_score import \
        get_composite_health_score_aggregator
    return get_composite_health_score_aggregator().aggregate().to_dict()


@app.get("/v1/admin/answer_helpfulness/report")
def admin_answer_helpfulness_report(
        window_sec: Optional[int] = None):
    from ..middleware.answer_helpfulness import \
        get_answer_helpfulness_tracker
    return get_answer_helpfulness_tracker().report(
        window_sec=window_sec).to_dict()


@app.get("/v1/admin/answer_helpfulness/negatives")
def admin_answer_helpfulness_negatives(limit: int = 20):
    from ..middleware.answer_helpfulness import \
        get_answer_helpfulness_tracker
    negs = get_answer_helpfulness_tracker().negative_samples(
        limit=limit)
    return {"negatives": [f.to_dict() for f in negs]}


class HelpfulnessSubmitRequest(BaseModel):
    request_id: str
    vote: str
    reasons: Optional[List[str]] = None
    free_text: Optional[str] = ""
    tenant_id: Optional[str] = None
    user_id: Optional[str] = None


@app.post("/v1/admin/answer_helpfulness/submit")
def admin_answer_helpfulness_submit(
        req: HelpfulnessSubmitRequest, request: Request):
    from ..middleware.answer_helpfulness import \
        get_answer_helpfulness_tracker
    try:
        f = get_answer_helpfulness_tracker().submit(
            request_id=req.request_id,
            vote=req.vote,
            reasons=req.reasons,
            free_text=req.free_text or "",
            tenant_id=req.tenant_id,
            user_id=req.user_id,
        )
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return f.to_dict()


@app.get("/v1/admin/adversarial_prompt")
def admin_adversarial_prompt_stats():
    from ..middleware.adversarial_prompt import \
        get_adversarial_prompt_detector
    return get_adversarial_prompt_detector().stats()


class AdversarialDetectRequest(BaseModel):
    text: str


@app.post("/v1/admin/adversarial_prompt/detect")
def admin_adversarial_prompt_detect(
        req: AdversarialDetectRequest, request: Request):
    from ..middleware.adversarial_prompt import \
        get_adversarial_prompt_detector
    try:
        report = get_adversarial_prompt_detector().detect(req.text)
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    return report.to_dict()


# ========================== /v1/query — minimal end-user endpoint =====
def query_endpoint(req: QueryRequest, request: Request):
    """End-user query endpoint. Runs adversarial check, session
    tracking, retrieval, generation, and zero-result tracking
    server-side so the frontend doesn't need admin scope."""
    from ..middleware.zero_result_tracker import \
        get_zero_result_tracker
    from ..middleware.session_topic_tracker import \
        get_session_topic_tracker
    from ..middleware.adversarial_prompt import \
        get_adversarial_prompt_detector

    # 1. Adversarial pre-check (server-side, always runs)
    adv = None
    try:
        adv = get_adversarial_prompt_detector().detect(req.query)
        if adv.recommendation == "block":
            return {
                "answer": (
                    "⚠ הקלט שלך זוהה כחשוד ("
                    + ", ".join(m.category for m in adv.matches)
                    + "). אם זו טעות, פנה למנהל המערכת."),
                "docs": [],
                "confidence": 0.0,
                "request_id": req.request_id,
                "adversarial": adv.to_dict(),
                "blocked": True,
            }
    except Exception:
        pass

    # 2. Session topic tracking (server-side, silent)
    if req.session_id:
        try:
            get_session_topic_tracker().record(
                req.session_id, req.query)
        except Exception:
            pass

    # 3. Try the real pipeline; fall back to empty if not wired.
    try:
        from ..pipeline import get_pipeline
        from ..core.types import Query
        pipe = get_pipeline()
        # v4.x — pass session_id through to the pipeline so the
        # session-uploads sidecar (in pipeline.py) can search the user's
        # own uploaded docs alongside the public corpus.
        rag_resp = pipe.run(Query(
            text=req.query,
            session_id=req.session_id,
            filters=dict(req.filters or {}),
        ))
        # RAGResponse has: answer, sources (list of doc_ids), retrieved
        retrieved = getattr(rag_resp, "retrieved", []) or []
        docs = []
        for r in retrieved[:req.top_k]:
            # Pipeline returns Retrieved(chunk=Chunk(doc_id, text,
            # metadata, ...), score, retriever, ...). Drill in.
            chunk = (getattr(r, "chunk", None) or
                       getattr(r, "document", None) or r)
            doc_id = (getattr(chunk, "doc_id", None) or
                       getattr(chunk, "id", None) or
                       getattr(r, "id", "?"))
            text = (getattr(chunk, "text", None) or
                     getattr(chunk, "content", "") or "")
            raw_meta = (getattr(chunk, "metadata", None) or
                          getattr(r, "metadata", None) or {})
            # Sometimes metadata is double-wrapped: meta = {metadata: {...}, source: ...}
            meta = raw_meta.get("metadata", raw_meta) \
                if isinstance(raw_meta, dict) else {}
            title = (meta.get("title") if isinstance(meta, dict)
                      else None) or \
                    (raw_meta.get("source")
                      if isinstance(raw_meta, dict) else None) or \
                    str(doc_id)
            kind = (meta.get("kind") if isinstance(meta, dict)
                     else None)
            chunk_idx = (meta.get("chunk")
                          if isinstance(meta, dict) else None)
            # v2.x — expose rich metadata so the frontend side-panel can
            # render citation, judges, dates, law, topic, etc.
            exposed_meta = {}
            if isinstance(meta, dict):
                # Flatten commonly-used keys with a safe subset (avoid leaking
                # internal markers). Keep strings/numbers/lists only.
                for k in ("citation", "court", "judges", "verdict_dt",
                          "verdict_ty", "inyan", "sides", "section", "law",
                          "topic", "chapter", "language", "type", "verbatim",
                          "title", "source"):
                    v = meta.get(k)
                    if v is not None and not isinstance(v, (bytes, bytearray)):
                        exposed_meta[k] = v
            docs.append({
                "doc_id": str(doc_id),
                "title": str(title),
                # v3.x — increased from 600 to 2500 chars so the frontend
                # can render FULL paragraphs (statute clauses are often
                # 800-1500 chars; caselaw excerpts can run 2000+). The
                # frontend's citation-card UI already truncates for display
                # via -webkit-line-clamp, so sending more text costs nothing
                # visually but gives us headroom for the source panel.
                "text": text[:2500],
                "score": getattr(r, "score", None),
                "kind": kind,
                "chunk_index": chunk_idx,
                "retriever": getattr(r, "retriever", None),
                "metadata": exposed_meta,
            })
        if not docs:
            get_zero_result_tracker().record(req.query)
        signals = getattr(rag_resp, "signals", None)
        confidence = (getattr(signals, "omega", None)
                        if signals else None)
        # v2.x — surface which generator actually ran (for UI badge).
        # Primary = anthropic/openai/local per preset; fallback =
        # extractive if primary errored. Read from pipe's stash set
        # in pipeline.py run() after the generate stage.
        gen_used = getattr(pipe, "_last_generator_used",
                           getattr(pipe.generator, "name", "unknown"))
        gen_fallback = bool(getattr(pipe, "_last_generator_fallback", False))
        answer = getattr(rag_resp, "answer", "(no answer generated)")
        # v3.x — training data collector (opt-in via TAU_RAG_COLLECT_TRAINING=1)
        # Auto-generate a request_id when the client didn't supply one,
        # so batch scripts and other callers don't silently lose traces.
        try:
            from ..middleware.training_collector import get_training_collector
            import uuid as _uuid
            _rid = (req.request_id or "").strip()
            if not _rid:
                _rid = "auto_" + _uuid.uuid4().hex[:16]
            get_training_collector().record(
                request_id=_rid,
                query=req.query,
                context=[{
                    "doc_id":    d.get("doc_id"),
                    "text":      d.get("text", "")[:600],
                    "metadata":  d.get("metadata", {}),
                    "score":     d.get("score"),
                    "retriever": d.get("retriever"),
                } for d in docs],
                answer=answer,
                language=getattr(getattr(rag_resp, "understanding", None),
                                 "language", "") or "",
                generator=gen_used,
                confidence=confidence,
            )
        except Exception:
            pass
        # v4.x — Smart Reasoning: surface understanding + timings so
        # the frontend can render Query Decomposition + Reasoning Trace.
        understanding = getattr(rag_resp, "understanding", None)
        sub_queries = []
        query_type = None
        domain = None
        if understanding is not None:
            sub_queries = list(getattr(understanding, "sub_queries", []) or [])
            # Drop the original query if it's accidentally in there
            sub_queries = [s for s in sub_queries if s and s != req.query]
            query_type = getattr(understanding, "query_type", None)
            domain = getattr(understanding, "domain", None)
        # RAGResponse has `timing_ms`, not `timings`
        timings_dict = getattr(rag_resp, "timing_ms", None) or \
                        getattr(rag_resp, "timings", None) or {}
        # v2 — Section-type post-filter (e.g. "search only in דיון
        # sections"). Recognized: header / background / procedural /
        # arguments_plaintiff / arguments_defendant / arguments_general /
        # discussion / ruling / operative.
        try:
            sec_type = (req.filters or {}).get("section_type")
            if sec_type:
                docs = _filter_docs_by_section_type(docs, str(sec_type))
        except Exception:
            pass

        # v2 — Smart Answer Enrichment: for each source doc, attach its
        # precedent status (overruled / good law / unknown). Cheap because
        # the corpus-wide overruled cache is built once and reused.
        try:
            _enrich_docs_with_precedent_status(docs)
        except Exception:
            pass
        return {
            "answer": answer,
            "docs": docs,
            "confidence": confidence,
            "request_id": req.request_id,
            "adversarial": adv.to_dict() if adv else None,
            "generator": {
                "used":       gen_used,
                "primary":    pipe.config.generation.provider,
                "model":      pipe.config.generation.model,
                "fallback":   gen_fallback,
            },
            # v4.x — Smart Reasoning trace
            "understanding": {
                "sub_queries":  sub_queries,
                "query_type":   query_type,
                "domain":       domain,
            },
            "timings": (
                {k: round(float(v), 2) for k, v in timings_dict.items()}
                if isinstance(timings_dict, dict) else {}
            ),
        }
    except Exception as e:
        # Fallback if pipeline init failed
        get_zero_result_tracker().record(req.query)
        return {
            "answer": (
                f"⚠ Pipeline error ({type(e).__name__}: {e}). "
                "Check server logs and confirm "
                "TAU_RAG_PRESET is set."),
            "docs": [],
            "confidence": 0.0,
            "request_id": req.request_id,
            "adversarial": adv.to_dict() if adv else None,
        }


# v4.x — Streaming variant of /v1/query.
# Emits SSE events: docs → token×N → signals → done.
# The perceived-latency win comes from sending docs FIRST so the user
# sees citation cards while the answer text streams in chunks.
def query_stream_endpoint(req: QueryRequest, request: Request):
    from fastapi.responses import StreamingResponse
    import json as _sjson

    def _err_payload(msg, code="error"):
        return _sjson.dumps({"error": {"code": code, "message": str(msg)}},
                             ensure_ascii=False)

    def _sse(event, data):
        # SSE framing: event:<name>\n data:<json>\n\n
        return f"event: {event}\ndata: {data}\n\n"

    def gen():
        # Reuse the SAME logic as the sync endpoint. We run it once,
        # then chunk the answer for the streaming effect.
        try:
            from ..middleware.zero_result_tracker import \
                get_zero_result_tracker
            from ..middleware.session_topic_tracker import \
                get_session_topic_tracker
            from ..middleware.adversarial_prompt import \
                get_adversarial_prompt_detector

            # Adversarial pre-check (mirrors sync endpoint)
            adv = None
            try:
                adv = get_adversarial_prompt_detector().detect(req.query)
                if adv.recommendation == "block":
                    yield _sse("error", _err_payload(
                        "הקלט שלך זוהה כחשוד.", code="adversarial"))
                    yield _sse("done", _sjson.dumps({"blocked": True},
                                                     ensure_ascii=False))
                    return
            except Exception:
                pass

            if req.session_id:
                try:
                    get_session_topic_tracker().record(
                        req.session_id, req.query)
                except Exception:
                    pass

            from ..pipeline import get_pipeline
            from ..core.types import Query
            pipe = get_pipeline()
            rag_resp = pipe.run(Query(
                text=req.query,
                session_id=req.session_id,
                filters=dict(req.filters or {}),
            ))

            # Build the docs payload (identical shape to /v1/query)
            retrieved = getattr(rag_resp, "retrieved", []) or []
            docs = []
            for r in retrieved[:req.top_k]:
                chunk = (getattr(r, "chunk", None) or
                          getattr(r, "document", None) or r)
                doc_id = (getattr(chunk, "doc_id", None) or
                           getattr(chunk, "id", None) or
                           getattr(r, "id", "?"))
                text = (getattr(chunk, "text", None) or
                         getattr(chunk, "content", "") or "")
                raw_meta = (getattr(chunk, "metadata", None) or
                              getattr(r, "metadata", None) or {})
                meta = (raw_meta.get("metadata", raw_meta)
                          if isinstance(raw_meta, dict) else {})
                title = (meta.get("title") if isinstance(meta, dict)
                           else None) or \
                          (raw_meta.get("source")
                           if isinstance(raw_meta, dict) else None) or \
                          str(doc_id)
                exposed_meta = {}
                if isinstance(meta, dict):
                    for k in ("citation", "court", "judges", "verdict_dt",
                              "verdict_ty", "inyan", "sides", "section",
                              "law", "topic", "chapter", "language",
                              "type", "verbatim", "title", "source"):
                        v = meta.get(k)
                        if v is not None and not isinstance(
                                v, (bytes, bytearray)):
                            exposed_meta[k] = v
                docs.append({
                    "doc_id": str(doc_id),
                    "title": str(title),
                    "text": text[:2500],
                    "score": getattr(r, "score", None),
                    "kind": (meta.get("kind") if isinstance(meta, dict)
                              else None),
                    "chunk_index": (meta.get("chunk")
                                      if isinstance(meta, dict) else None),
                    "retriever": getattr(r, "retriever", None),
                    "metadata": exposed_meta,
                })

            # 1) Emit docs FIRST so the UI can render citation cards
            #    while the answer text streams in. Pre-filter by section
            #    type (if requested), then enrich with precedent status
            #    BEFORE emitting so the user sees overruled badges on the
            #    very first paint.
            try:
                sec_type = (req.filters or {}).get("section_type")
                if sec_type:
                    docs = _filter_docs_by_section_type(docs, str(sec_type))
            except Exception:
                pass
            try:
                _enrich_docs_with_precedent_status(docs)
            except Exception:
                pass
            yield _sse("docs", _sjson.dumps({"docs": docs},
                                              ensure_ascii=False))

            answer = getattr(rag_resp, "answer", "(no answer generated)")

            # 2) Stream the answer in word-sized chunks. Hebrew tokens
            #    are space-delimited like English here. We emit chunks
            #    of ~6 words each so the typewriter effect is smooth
            #    but doesn't flood the network with one-word events.
            import re as _sre
            pieces = _sre.split(r'(\s+)', answer)   # keep whitespace
            buf = []
            count = 0
            for p in pieces:
                buf.append(p)
                if not p.isspace():
                    count += 1
                if count >= 6:
                    chunk = ''.join(buf)
                    yield _sse("token", _sjson.dumps(
                        {"chunk": chunk}, ensure_ascii=False))
                    buf = []; count = 0
            if buf:
                yield _sse("token", _sjson.dumps(
                    {"chunk": ''.join(buf)}, ensure_ascii=False))

            # 3) Signals + meta
            signals = getattr(rag_resp, "signals", None)
            confidence = (getattr(signals, "omega", None)
                            if signals else None)
            gen_used = getattr(pipe, "_last_generator_used",
                                getattr(pipe.generator, "name", "unknown"))
            gen_fallback = bool(getattr(pipe, "_last_generator_fallback",
                                          False))
            yield _sse("signals", _sjson.dumps({
                "confidence": confidence,
                "generator": {
                    "used":     gen_used,
                    "primary":  pipe.config.generation.provider,
                    "model":    pipe.config.generation.model,
                    "fallback": gen_fallback,
                },
                "adversarial": adv.to_dict() if adv else None,
            }, ensure_ascii=False))

            # 4) Done — full final payload (mirrors /v1/query response)
            yield _sse("done", _sjson.dumps({
                "answer": answer,
                "docs": docs,
                "confidence": confidence,
                "request_id": req.request_id,
                "adversarial": adv.to_dict() if adv else None,
                "generator": {
                    "used":     gen_used,
                    "primary":  pipe.config.generation.provider,
                    "model":    pipe.config.generation.model,
                    "fallback": gen_fallback,
                },
            }, ensure_ascii=False))

        except Exception as e:
            try:
                from ..middleware.zero_result_tracker import \
                    get_zero_result_tracker
                get_zero_result_tracker().record(req.query)
            except Exception:
                pass
            yield _sse("error", _err_payload(
                f"Pipeline error ({type(e).__name__}: {e})"))
            yield _sse("done", _sjson.dumps(
                {"answer": "", "docs": []}, ensure_ascii=False))

    return StreamingResponse(
        gen(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache, no-transform",
            "X-Accel-Buffering": "no",   # tell nginx not to buffer
            "Connection": "keep-alive",
        },
    )


# -------- /v1/data/load_jsonl — load a JSONL by server-side path ----
class LoadJsonlRequest(BaseModel):
    path: str
    batch_size: int = 1000
    max_lines: Optional[int] = None
    skip_lines: int = 0


@app.post("/v1/data/load_jsonl")
def data_load_jsonl(req: LoadJsonlRequest, request: Request):
    """Load a JSONL file from a server-side path into the pipeline.

    Use this when the JSONL is large (>100MB) — multipart upload
    chokes on big bodies. The file MUST be on the server filesystem.
    Each line: {"id": "...", "text": "...", "metadata": {...}}.
    Streams in batches; safe for multi-GB files.
    """
    import os
    from pathlib import Path
    from ..middleware.corpus_ingestion import (
        get_corpus_ingestion_tracker,
        STAGE_PARSE, STAGE_INDEX,
    )
    from ..core.types import Document

    p = Path(req.path).expanduser().resolve()
    if not p.exists() or not p.is_file():
        raise HTTPException(
            status_code=404,
            detail=f"file not found on server: {p}")
    if req.batch_size < 1:
        raise HTTPException(
            status_code=400,
            detail="batch_size must be >= 1")

    tracker = get_corpus_ingestion_tracker()

    pipe = None
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
    except Exception:
        pipe = None

    n_total = 0
    n_indexed = 0
    n_bad_lines = 0
    n_batches = 0
    batch: List[Document] = []
    source_name = p.name

    with open(p, "r", encoding="utf-8") as f:
        for i, line in enumerate(f):
            if i < req.skip_lines:
                continue
            line = line.strip()
            if not line:
                continue
            try:
                row = json.loads(line)
            except Exception:
                n_bad_lines += 1
                continue
            text = row.get("text") or row.get("chunk") or ""
            if not text:
                n_bad_lines += 1
                continue
            meta = row.get("metadata", {}) or {}
            if isinstance(meta, dict):
                meta = dict(meta)
                meta.setdefault("source", source_name)
            else:
                meta = {"source": source_name}
            doc = Document(
                id=str(row.get("id", f"{source_name}-{i}")),
                text=str(text),
                metadata=meta,
            )
            batch.append(doc)
            n_total += 1

            if len(batch) >= req.batch_size:
                # Flush batch
                for d in batch:
                    tracker.record(d.id, source_name,
                                       STAGE_PARSE, True, 0)
                if pipe and hasattr(pipe, "add_documents"):
                    try:
                        pipe.add_documents(batch)
                        for d in batch:
                            tracker.record(
                                d.id, source_name,
                                STAGE_INDEX, True, 0)
                        n_indexed += len(batch)
                    except Exception:
                        pass
                n_batches += 1
                batch = []

            if req.max_lines and n_total >= req.max_lines:
                break

    # Flush final partial batch
    if batch:
        for d in batch:
            tracker.record(d.id, source_name,
                              STAGE_PARSE, True, 0)
        if pipe and hasattr(pipe, "add_documents"):
            try:
                pipe.add_documents(batch)
                for d in batch:
                    tracker.record(d.id, source_name,
                                       STAGE_INDEX, True, 0)
                n_indexed += len(batch)
            except Exception:
                pass
        n_batches += 1

    return {
        "ok": True,
        "path": str(p),
        "size_mb": round(p.stat().st_size / 1024 / 1024, 2),
        "n_docs_loaded": n_total,
        "n_indexed_in_pipeline": n_indexed,
        "n_bad_lines": n_bad_lines,
        "n_batches": n_batches,
        "pipeline_attached": pipe is not None,
    }


# -------- /v1/data/load_local — stream a local JSONL into the index --
class LoadLocalRequest(BaseModel):
    path: str                       # server-side JSONL path
    batch_size: int = 1000          # how many docs to add at a time
    max_docs: Optional[int] = None  # cap total (for testing)
    skip_first: int = 0             # resume from line N


@app.post("/v1/data/load_local")
def load_local_endpoint(req: LoadLocalRequest,
                            request: Request):
    """Stream a local JSONL file into the live index.

    Use this when the file is too big to POST as multipart.
    Reads line-by-line, batches into the pipeline, reports progress.
    """
    import os
    from pathlib import Path
    from ..middleware.corpus_ingestion import (
        get_corpus_ingestion_tracker, STAGE_INDEX,
    )
    from ..core.types import Document

    src = Path(req.path).expanduser().resolve()
    if not src.exists():
        raise HTTPException(status_code=404,
                                detail=f"file not found: {src}")
    if not src.is_file():
        raise HTTPException(status_code=400,
                                detail=f"not a file: {src}")

    pipe = None
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
    except Exception:
        pipe = None

    tracker = get_corpus_ingestion_tracker()
    total_size = src.stat().st_size
    n_lines = 0
    n_docs = 0
    n_indexed = 0
    n_errors = 0
    batch: List[Document] = []

    def flush_batch():
        nonlocal n_indexed
        if not batch:
            return
        if pipe and hasattr(pipe, "add_documents"):
            try:
                pipe.add_documents(batch)
                for d in batch:
                    tracker.record(
                        d.id,
                        d.metadata.get("source", "jsonl"),
                        STAGE_INDEX, True, 0)
                n_indexed += len(batch)
            except Exception:
                pass
        batch.clear()

    with open(src, encoding="utf-8",
               errors="replace") as f:
        for line in f:
            n_lines += 1
            if n_lines <= req.skip_first:
                continue
            if req.max_docs and n_docs >= req.max_docs:
                break
            line = line.strip()
            if not line:
                continue
            try:
                row = json.loads(line)
            except Exception:
                n_errors += 1
                continue
            text = row.get("text", "")
            if not text:
                continue
            doc = Document(
                id=str(row.get("id", f"line-{n_lines}")),
                text=str(text),
                metadata=row.get("metadata", {}) or {})
            batch.append(doc)
            n_docs += 1
            if len(batch) >= req.batch_size:
                flush_batch()
        flush_batch()

    return {
        "ok": True,
        "file": str(src),
        "file_size_bytes": total_size,
        "n_lines": n_lines,
        "n_docs_parsed": n_docs,
        "n_docs_indexed": n_indexed,
        "n_errors": n_errors,
        "pipeline_attached": pipe is not None,
    }


# -------- /v1/data/load_jsonl — server-side JSONL load (streaming) --
class LoadJsonlRequest(BaseModel):
    path: str
    batch_size: int = 500


@app.post("/v1/data/load_jsonl")
def data_load_jsonl(req: LoadJsonlRequest, request: Request):
    """Load a server-side JSONL file into the live pipeline, streaming
    row-by-row in batches. Avoids multipart upload for large corpora
    (1M+ docs). Each JSONL line must be: {"id":..,"text":..,"metadata":..}
    """
    import os
    from ..core.types import Document
    from ..middleware.corpus_ingestion import (
        get_corpus_ingestion_tracker,
        STAGE_PARSE, STAGE_INDEX,
    )

    path = os.path.expanduser(req.path)
    if not os.path.isfile(path):
        raise HTTPException(
            status_code=404,
            detail=f"file not found on server: {path}")
    if req.batch_size < 1 or req.batch_size > 10000:
        raise HTTPException(
            status_code=400,
            detail="batch_size must be in [1, 10000]")

    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
        if not hasattr(pipe, "add_documents"):
            raise HTTPException(
                status_code=500,
                detail="pipeline has no add_documents() method")
    except ImportError:
        raise HTTPException(
            status_code=500,
            detail="pipeline module missing")

    tracker = get_corpus_ingestion_tracker()
    source_name = os.path.basename(path)

    n_lines = 0
    n_indexed = 0
    n_skipped = 0
    batch: List[Document] = []
    errors: List[str] = []

    def flush_batch():
        nonlocal batch, n_indexed
        if not batch:
            return
        try:
            pipe.add_documents(batch)
            for d in batch:
                tracker.record(d.id, source_name,
                                   STAGE_INDEX, True, 0)
            n_indexed += len(batch)
        except Exception as e:
            errors.append(
                f"batch of {len(batch)} failed: "
                f"{type(e).__name__}: {e}")
        batch = []

    try:
        with open(path, "r", encoding="utf-8") as f:
            for line in f:
                n_lines += 1
                line = line.strip()
                if not line:
                    continue
                try:
                    row = json.loads(line)
                except Exception:
                    n_skipped += 1
                    continue
                text = row.get("text")
                if not text:
                    n_skipped += 1
                    continue
                doc_id = str(row.get(
                    "id", f"{source_name}-{n_lines}"))
                meta = row.get("metadata") or {}
                meta["source"] = meta.get(
                    "source", source_name)
                d = Document(id=doc_id, text=str(text),
                                 metadata=meta)
                tracker.record(d.id, source_name,
                                   STAGE_PARSE, True, 0)
                batch.append(d)
                if len(batch) >= req.batch_size:
                    flush_batch()
        flush_batch()
    except Exception as e:
        errors.append(
            f"read error at line {n_lines}: "
            f"{type(e).__name__}: {e}")

    return {
        "ok": not errors,
        "path": path,
        "n_lines_read": n_lines,
        "n_docs_indexed": n_indexed,
        "n_skipped": n_skipped,
        "batch_size": req.batch_size,
        "errors": errors[:10],
    }


# -------- /v1/data/upload — upload any document type as corpus --------
def _chunk_text(text: str, target_chars: int = 1500,
                  overlap_chars: int = 200) -> List[str]:
    """Simple sentence-aware chunker. Avoids splitting mid-sentence
    when possible. Falls back to char-window if sentences are huge."""
    import re as _re
    text = (text or "").strip()
    if not text:
        return []
    if len(text) <= target_chars:
        return [text]
    # Split on sentence boundaries (HE + EN)
    sentences = _re.split(r'(?<=[.!?׃])\s+|\n{2,}', text)
    chunks: List[str] = []
    cur = ""
    for s in sentences:
        s = s.strip()
        if not s:
            continue
        if len(cur) + len(s) + 1 <= target_chars:
            cur = (cur + " " + s).strip() if cur else s
        else:
            if cur:
                chunks.append(cur)
            # Sentence itself larger than budget — hard split
            if len(s) > target_chars:
                for i in range(0, len(s),
                                  target_chars - overlap_chars):
                    chunks.append(s[i:i + target_chars])
                cur = ""
            else:
                cur = s
    if cur:
        chunks.append(cur)
    # Apply overlap between consecutive chunks
    if overlap_chars > 0 and len(chunks) > 1:
        with_overlap = [chunks[0]]
        for i in range(1, len(chunks)):
            prev_tail = chunks[i - 1][-overlap_chars:]
            with_overlap.append(prev_tail + " " + chunks[i])
        return with_overlap
    return chunks


def _extract_pdf_text(path: str) -> str:
    try:
        from pypdf import PdfReader   # type: ignore
    except ImportError:
        try:
            from PyPDF2 import PdfReader  # type: ignore
        except ImportError:
            raise RuntimeError(
                "PDF support requires 'pypdf' or 'PyPDF2'. "
                "pip install pypdf")
    reader = PdfReader(path)
    out = []
    for page in reader.pages:
        try:
            out.append(page.extract_text() or "")
        except Exception:
            continue
    return "\n\n".join(out)


def _extract_docx_text(path: str) -> str:
    try:
        from docx import Document as _DocxDoc  # type: ignore
    except ImportError:
        raise RuntimeError(
            "DOCX support requires 'python-docx'. "
            "pip install python-docx")
    d = _DocxDoc(path)
    return "\n\n".join(p.text for p in d.paragraphs if p.text.strip())


def _extract_pptx_text(path: str) -> str:
    try:
        from pptx import Presentation             # type: ignore
    except ImportError:
        raise RuntimeError(
            "PPTX support requires 'python-pptx'. "
            "pip install python-pptx")
    prs = Presentation(path)
    parts = []
    for slide_i, slide in enumerate(prs.slides, 1):
        slide_parts = []
        for shape in slide.shapes:
            if hasattr(shape, "text") and shape.text:
                slide_parts.append(shape.text)
        if slide_parts:
            parts.append(
                f"## Slide {slide_i}\n"
                + "\n".join(slide_parts))
    return "\n\n".join(parts)


def _extract_xlsx_text(path: str) -> str:
    try:
        from openpyxl import load_workbook         # type: ignore
    except ImportError:
        raise RuntimeError(
            "XLSX support requires 'openpyxl'. "
            "pip install openpyxl")
    wb = load_workbook(path, data_only=True,
                          read_only=True)
    parts = []
    for ws in wb.worksheets:
        sheet_parts = [f"## Sheet: {ws.title}"]
        for row in ws.iter_rows(values_only=True):
            cells = [str(c) for c in row
                      if c is not None and str(c).strip()]
            if cells:
                sheet_parts.append(" | ".join(cells))
        if len(sheet_parts) > 1:
            parts.append("\n".join(sheet_parts))
    return "\n\n".join(parts)


def _extract_html_text(html_bytes: bytes) -> str:
    """Simple HTML→text without external deps."""
    import re as _re
    from html.parser import HTMLParser
    text = html_bytes.decode("utf-8", errors="replace")
    # Drop script/style content
    text = _re.sub(r'<(script|style)[^>]*>.*?</\1>', ' ',
                     text, flags=_re.DOTALL | _re.IGNORECASE)

    class _Strip(HTMLParser):
        def __init__(self):
            super().__init__()
            self.parts: List[str] = []
            self._skip = 0
        def handle_starttag(self, tag, attrs):
            if tag.lower() in ("script", "style"):
                self._skip += 1
        def handle_endtag(self, tag):
            if tag.lower() in ("script", "style") and self._skip:
                self._skip -= 1
        def handle_data(self, data):
            if not self._skip and data.strip():
                self.parts.append(data)
    p = _Strip()
    try:
        p.feed(text)
    except Exception:
        pass
    out = " ".join(p.parts)
    # Collapse whitespace
    out = _re.sub(r'\s+', ' ', out).strip()
    return out


@app.post("/v1/data/upload")
async def data_upload_endpoint(
        file: UploadFile = File(...),
        kind: str = Form("auto"),
        text_field: str = Form("text"),
        id_field: str = Form("id"),
        chunk_size: int = Form(1500),
        chunk_overlap: int = Form(200)):
    """Upload any document as corpus.

    Supported kinds (auto-detected from extension):
      - parquet / pq         — pyarrow rows → Documents
      - csv / tsv            — DictReader rows → Documents
      - jsonl / ndjson / json — one JSON per line → Documents
      - pkl / pickle         — list/dict pickle → Documents
      - txt                  — plain text → chunked Documents
      - md / markdown        — markdown → chunked Documents
      - pdf                  — pypdf extract → chunked Documents
      - docx                 — python-docx extract → chunked Documents
      - html / htm           — strip tags → chunked Documents

    Form fields:
      - file (multipart, required)
      - kind: explicit override (default auto-detect)
      - text_field / id_field: only for tabular formats
      - chunk_size / chunk_overlap: only for free-text formats
    """
    import os
    from ..middleware.corpus_ingestion import (
        get_corpus_ingestion_tracker,
        STAGE_PARSE, STAGE_INDEX,
    )
    from ..core.types import Document
    if file is None:
        raise HTTPException(status_code=400,
                                detail="missing 'file' field")
    runtime_dir = os.environ.get(
        "TAU_RAG_RUNTIME_DIR",
        os.path.join(os.path.dirname(
            os.path.dirname(os.path.abspath(__file__))),
            "runtime"))
    upload_dir = os.path.join(runtime_dir, "uploads")
    os.makedirs(upload_dir, exist_ok=True)

    safe_name = os.path.basename(file.filename or "upload.dat")
    target = os.path.join(upload_dir, safe_name)

    ext = (safe_name.rsplit(".", 1)[-1] or "").lower()
    auto_kind = {
        "parquet": "parquet", "pq": "parquet",
        "csv": "csv", "tsv": "csv",
        "jsonl": "jsonl", "ndjson": "jsonl", "json": "jsonl",
        "pkl": "pkl", "pickle": "pkl",
        "txt": "txt", "text": "txt", "log": "txt",
        "md": "md", "markdown": "md",
        "yaml": "txt", "yml": "txt",
        "pdf": "pdf",
        "docx": "docx",
        "pptx": "pptx",
        "xlsx": "xlsx",
        "html": "html", "htm": "html",
    }.get(ext, "txt" if kind == "auto" else kind)
    inferred = auto_kind if kind == "auto" else kind

    contents = await file.read()
    with open(target, "wb") as f:
        f.write(contents)

    docs: List[Document] = []
    parse_error: Optional[str] = None

    try:
        # ============================ TABULAR FORMATS ===========
        if inferred == "parquet":
            try:
                import pyarrow.parquet as pq          # type: ignore
                table = pq.read_table(target)
                rows = [dict(zip(table.column_names, vals))
                          for vals in zip(*[c.to_pylist()
                                             for c in table.columns])]
            except ImportError:
                import pandas as pd                    # type: ignore
                rows = pd.read_parquet(target).to_dict("records")
            docs = _rows_to_docs(rows, safe_name,
                                     text_field, id_field)

        elif inferred == "csv":
            import csv as _csv
            delim = "\t" if ext == "tsv" else ","
            with open(target, encoding="utf-8") as f:
                rows = list(_csv.DictReader(f, delimiter=delim))
            docs = _rows_to_docs(rows, safe_name,
                                     text_field, id_field)

        elif inferred == "jsonl":
            rows = []
            for line in contents.decode(
                    "utf-8", errors="replace").splitlines():
                line = line.strip()
                if not line:
                    continue
                try:
                    rows.append(json.loads(line))
                except Exception:
                    continue
            docs = _rows_to_docs(rows, safe_name,
                                     text_field, id_field)

        elif inferred == "pkl":
            import pickle
            with open(target, "rb") as f:
                data = pickle.load(f)
            rows = (data if isinstance(data, list)
                    else [data] if isinstance(data, dict) else [])
            docs = _rows_to_docs(rows, safe_name,
                                     text_field, id_field)

        # ============================ FREE-TEXT FORMATS =========
        elif inferred in ("txt", "md"):
            text = contents.decode("utf-8", errors="replace")
            chunks = _chunk_text(text, chunk_size, chunk_overlap)
            for i, ch in enumerate(chunks):
                docs.append(Document(
                    id=f"{safe_name}-{i}",
                    text=ch,
                    metadata={"source": safe_name,
                                "chunk": i,
                                "n_chunks": len(chunks),
                                "kind": inferred}))

        elif inferred == "pdf":
            text = _extract_pdf_text(target)
            chunks = _chunk_text(text, chunk_size, chunk_overlap)
            for i, ch in enumerate(chunks):
                docs.append(Document(
                    id=f"{safe_name}-{i}",
                    text=ch,
                    metadata={"source": safe_name,
                                "chunk": i,
                                "n_chunks": len(chunks),
                                "kind": "pdf"}))

        elif inferred == "docx":
            text = _extract_docx_text(target)
            chunks = _chunk_text(text, chunk_size, chunk_overlap)
            for i, ch in enumerate(chunks):
                docs.append(Document(
                    id=f"{safe_name}-{i}",
                    text=ch,
                    metadata={"source": safe_name,
                                "chunk": i,
                                "n_chunks": len(chunks),
                                "kind": "docx"}))

        elif inferred == "pptx":
            text = _extract_pptx_text(target)
            chunks = _chunk_text(text, chunk_size, chunk_overlap)
            for i, ch in enumerate(chunks):
                docs.append(Document(
                    id=f"{safe_name}-{i}",
                    text=ch,
                    metadata={"source": safe_name,
                                "chunk": i,
                                "n_chunks": len(chunks),
                                "kind": "pptx"}))

        elif inferred == "xlsx":
            text = _extract_xlsx_text(target)
            chunks = _chunk_text(text, chunk_size, chunk_overlap)
            for i, ch in enumerate(chunks):
                docs.append(Document(
                    id=f"{safe_name}-{i}",
                    text=ch,
                    metadata={"source": safe_name,
                                "chunk": i,
                                "n_chunks": len(chunks),
                                "kind": "xlsx"}))

        elif inferred == "html":
            text = _extract_html_text(contents)
            chunks = _chunk_text(text, chunk_size, chunk_overlap)
            for i, ch in enumerate(chunks):
                docs.append(Document(
                    id=f"{safe_name}-{i}",
                    text=ch,
                    metadata={"source": safe_name,
                                "chunk": i,
                                "n_chunks": len(chunks),
                                "kind": "html"}))
        else:
            raise ValueError(f"unsupported kind: {inferred}")

    except Exception as e:
        parse_error = f"{type(e).__name__}: {e}"

    # Track ingestion (v3.81)
    tracker = get_corpus_ingestion_tracker()
    n_indexed = 0
    if not parse_error:
        for d in docs:
            tracker.record(d.id, safe_name, STAGE_PARSE,
                              True, 0)
        try:
            from ..pipeline import get_pipeline
            pipe = get_pipeline()
            if hasattr(pipe, "add_documents"):
                pipe.add_documents(docs)
                for d in docs:
                    tracker.record(d.id, safe_name,
                                       STAGE_INDEX, True, 0)
                n_indexed = len(docs)
        except Exception:
            pass

    return {
        "ok": parse_error is None,
        "filename": safe_name,
        "kind": inferred,
        "saved_to": target,
        "n_rows_parsed": len(docs),
        "n_indexed_in_pipeline": n_indexed,
        "size_bytes": len(contents),
        "error": parse_error,
        "sample_doc_ids": [d.id for d in docs[:5]],
        "preview": docs[0].text[:200] if docs else None,
    }


def _rows_to_docs(rows, source_name: str,
                       text_field: str, id_field: str):
    """Helper: tabular rows → Document list.

    Handles 2 row shapes:
    1. Flat row: {id, text, ...other_fields}
    2. Nested row: {id, text, metadata: {source, kind, ...}}
       — common when ingesting an already-processed JSONL.
    In case 2, we FLATTEN the inner metadata to preserve the original
    source instead of overriding it with the upload filename.
    """
    from ..core.types import Document
    out = []
    for i, row in enumerate(rows):
        if isinstance(row, str):
            out.append(Document(
                id=f"{source_name}-{i}",
                text=row,
                metadata={"source": source_name,
                            "upload_source": source_name}))
            continue
        if not isinstance(row, dict):
            continue
        text = (row.get(text_field) or
                 row.get("text") or row.get("chunk") or
                 row.get("content") or "")
        if not text:
            continue
        doc_id = str(row.get(id_field) or
                      row.get("id") or
                      f"{source_name}-{i}")
        # Start with all top-level fields (minus id/text)
        meta = {k: v for k, v in row.items()
                if k not in (text_field, "text", "chunk",
                              "content", id_field, "id",
                              "metadata")}
        # Merge nested 'metadata' dict if present, INNER WINS
        inner = row.get("metadata")
        if isinstance(inner, dict):
            for k, v in inner.items():
                meta[k] = v
        # Preserve upload provenance WITHOUT clobbering the
        # original source (which might be much more specific)
        if "source" not in meta:
            meta["source"] = source_name
        meta["upload_source"] = source_name
        out.append(Document(
            id=doc_id, text=str(text), metadata=meta))
    return out


# -------- /v1/data/load_corpus_from_path — stream JSONL → pipeline --
class LoadCorpusRequest(BaseModel):
    path: str                       # server-side JSONL path
    limit: Optional[int] = None     # cap N chunks (useful for testing)
    batch_size: int = 1000
    filter_kind: Optional[List[str]] = None   # e.g. ["pdf","txt"]
    skip_existing: bool = False     # if pipeline already loaded, skip


@app.post("/v1/data/load_corpus_from_path")
def load_corpus_from_path(req: LoadCorpusRequest,
                               request: Request):
    """Stream a JSONL file from disk into the live pipeline.

    Each line must be a JSON object with {"id", "text", "metadata"}.
    Reads in batches to avoid memory spikes. Returns a summary.

    This bypasses HTTP upload — use it for large corpora
    (produced by scripts/ingest_local.py).
    """
    import os
    from ..core.types import Document
    path = os.path.expanduser(req.path)
    if not os.path.exists(path):
        raise HTTPException(status_code=404,
                                detail=f"path not found: {path}")
    if not os.path.isfile(path):
        raise HTTPException(status_code=400,
                                detail=f"not a regular file: {path}")

    # Attach pipeline
    pipe = None
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
        if not hasattr(pipe, "add_documents"):
            pipe = None
    except Exception:
        pipe = None
    if pipe is None:
        raise HTTPException(
            status_code=503,
            detail="pipeline has no add_documents() — "
                    "check preset config")

    kinds = set(req.filter_kind) if req.filter_kind else None
    total = 0
    added = 0
    skipped = 0
    bad_lines = 0
    batch: List[Document] = []

    import time as _time
    t0 = _time.time()

    with open(path, encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line:
                continue
            total += 1
            try:
                row = json.loads(line)
            except Exception:
                bad_lines += 1
                continue
            text = row.get("text", "")
            doc_id = str(row.get("id") or f"doc-{total}")
            meta = row.get("metadata") or {}
            if kinds and meta.get("kind") not in kinds:
                skipped += 1
                continue
            if not text or not isinstance(text, str):
                skipped += 1
                continue
            batch.append(Document(
                id=doc_id, text=text, metadata=dict(meta)))
            if len(batch) >= req.batch_size:
                try:
                    pipe.add_documents(batch)
                    added += len(batch)
                except Exception as e:
                    raise HTTPException(
                        status_code=500,
                        detail=f"pipeline.add_documents "
                                f"failed at n={added}: "
                                f"{type(e).__name__}: {e}")
                batch = []
            if req.limit and added >= req.limit:
                break

    # Flush remainder
    if batch:
        try:
            pipe.add_documents(batch)
            added += len(batch)
        except Exception as e:
            raise HTTPException(
                status_code=500,
                detail=f"pipeline.add_documents failed "
                        f"at final flush: {e}")

    elapsed = _time.time() - t0
    return {
        "ok": True,
        "path": path,
        "n_lines_total": total,
        "n_docs_added": added,
        "n_skipped_filter": skipped,
        "n_bad_lines": bad_lines,
        "elapsed_sec": round(elapsed, 2),
        "docs_per_sec": round(added / max(1, elapsed), 1),
    }


# -------- /v1/data/ingest_dir — recursively ingest a host directory --
class IngestDirRequest(BaseModel):
    path: str
    recursive: bool = True
    chunk_size: int = 1500
    chunk_overlap: int = 200
    max_files: Optional[int] = None
    extensions: Optional[List[str]] = None  # whitelist; default = all
    dry_run: bool = False                   # scan only, don't ingest


@app.post("/v1/data/ingest_dir")
def data_ingest_dir(req: IngestDirRequest, request: Request):
    """Recursively ingest a server-side directory.

    Walks `req.path` (server filesystem!), detects each file by
    extension, parses + chunks + adds to live pipeline. Returns a
    per-file summary.

    Note: `req.path` is resolved on the SERVER, not the client. The
    server process must have read access to it.
    """
    import os
    from pathlib import Path
    from ..middleware.corpus_ingestion import (
        get_corpus_ingestion_tracker,
        STAGE_PARSE, STAGE_INDEX,
    )
    from ..core.types import Document

    root = Path(req.path).expanduser().resolve()
    if not root.exists():
        raise HTTPException(
            status_code=404,
            detail=f"path does not exist on server: {root}")
    if not root.is_dir():
        raise HTTPException(
            status_code=400,
            detail=f"not a directory: {root}")

    # Auto-detect by extension; same table as /v1/data/upload + pptx/xlsx
    ext_to_kind = {
        ".parquet": "parquet", ".pq": "parquet",
        ".csv": "csv", ".tsv": "csv",
        ".jsonl": "jsonl", ".ndjson": "jsonl",
        ".json": "jsonl",
        ".pkl": "pkl", ".pickle": "pkl",
        ".txt": "txt", ".text": "txt", ".log": "txt",
        ".md": "md", ".markdown": "md",
        ".yaml": "txt", ".yml": "txt",
        ".pdf": "pdf",
        ".docx": "docx",
        ".pptx": "pptx",
        ".xlsx": "xlsx",
        ".html": "html", ".htm": "html",
    }
    allowed = (set(e.lower() if e.startswith(".") else "." + e.lower()
                       for e in (req.extensions or [])) or
                 set(ext_to_kind.keys()))

    # Walk
    files: List[Path] = []
    iter_paths = root.rglob("*") if req.recursive else root.iterdir()
    for p in iter_paths:
        if not p.is_file():
            continue
        if p.suffix.lower() not in allowed:
            continue
        if any(part.startswith(".") for part in p.parts[len(root.parts):]):
            continue   # skip dot-dirs / dotfiles
        files.append(p)
        if req.max_files and len(files) >= req.max_files:
            break

    files.sort()
    if req.dry_run:
        return {
            "ok": True, "dry_run": True,
            "root": str(root),
            "n_files_found": len(files),
            "by_kind": _count_by(
                Counter([ext_to_kind.get(p.suffix.lower(),
                                              "other")
                          for p in files])),
            "files": [str(p.relative_to(root)) for p in files[:200]],
            "truncated": len(files) > 200,
        }

    tracker = get_corpus_ingestion_tracker()
    per_file: List[Dict[str, Any]] = []
    n_total_docs = 0
    n_total_indexed = 0

    pipe = None
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
    except Exception:
        pipe = None

    for p in files:
        kind = ext_to_kind.get(p.suffix.lower(), "txt")
        rec = {"file": str(p.relative_to(root)),
                "kind": kind, "ok": False,
                "n_docs": 0, "n_indexed": 0, "error": None}
        try:
            with open(p, "rb") as f:
                contents = f.read()
            docs = _parse_to_docs(
                contents=contents,
                target_path=str(p),
                source_name=str(p.relative_to(root)),
                kind=kind,
                text_field="text",
                id_field="id",
                chunk_size=req.chunk_size,
                chunk_overlap=req.chunk_overlap,
            )
            rec["n_docs"] = len(docs)
            for d in docs:
                tracker.record(d.id, str(p.relative_to(root)),
                                   STAGE_PARSE, True, 0)
            if pipe and hasattr(pipe, "add_documents") and docs:
                pipe.add_documents(docs)
                for d in docs:
                    tracker.record(d.id,
                                       str(p.relative_to(root)),
                                       STAGE_INDEX, True, 0)
                rec["n_indexed"] = len(docs)
                n_total_indexed += len(docs)
            n_total_docs += len(docs)
            rec["ok"] = True
        except Exception as e:
            rec["error"] = f"{type(e).__name__}: {e}"
            tracker.record(
                f"{p.relative_to(root)}-fail",
                str(p.relative_to(root)),
                STAGE_PARSE, False, 0)
        per_file.append(rec)

    return {
        "ok": True,
        "root": str(root),
        "n_files_found": len(files),
        "n_files_ok": sum(1 for r in per_file if r["ok"]),
        "n_files_failed": sum(1 for r in per_file if not r["ok"]),
        "n_total_docs": n_total_docs,
        "n_total_indexed": n_total_indexed,
        "pipeline_attached": pipe is not None,
        "per_file": per_file[:500],
        "truncated": len(per_file) > 500,
    }


def _count_by(counter):
    return dict(counter)


def _parse_to_docs(contents: bytes, target_path: str,
                       source_name: str, kind: str,
                       text_field: str, id_field: str,
                       chunk_size: int, chunk_overlap: int):
    """Parse raw bytes by `kind` → list of Document. Reuses helpers
    from the upload endpoint."""
    from ..core.types import Document
    docs = []

    if kind in ("parquet",):
        try:
            import pyarrow.parquet as pq          # type: ignore
            table = pq.read_table(target_path)
            rows = [dict(zip(table.column_names, vals))
                      for vals in zip(*[c.to_pylist()
                                         for c in table.columns])]
        except ImportError:
            import pandas as pd                    # type: ignore
            rows = pd.read_parquet(target_path).to_dict("records")
        return _rows_to_docs(rows, source_name,
                                 text_field, id_field)

    if kind == "csv":
        import csv as _csv
        delim = "\t" if target_path.endswith(".tsv") else ","
        with open(target_path, encoding="utf-8") as f:
            rows = list(_csv.DictReader(f, delimiter=delim))
        return _rows_to_docs(rows, source_name,
                                 text_field, id_field)

    if kind == "jsonl":
        rows = []
        for line in contents.decode("utf-8",
                                          errors="replace").splitlines():
            line = line.strip()
            if not line:
                continue
            try:
                rows.append(json.loads(line))
            except Exception:
                continue
        return _rows_to_docs(rows, source_name,
                                 text_field, id_field)

    if kind == "pkl":
        import pickle
        with open(target_path, "rb") as f:
            data = pickle.load(f)
        rows = (data if isinstance(data, list)
                else [data] if isinstance(data, dict) else [])
        return _rows_to_docs(rows, source_name,
                                 text_field, id_field)

    if kind in ("txt", "md"):
        text = contents.decode("utf-8", errors="replace")
    elif kind == "pdf":
        text = _extract_pdf_text(target_path)
    elif kind == "docx":
        text = _extract_docx_text(target_path)
    elif kind == "pptx":
        text = _extract_pptx_text(target_path)
    elif kind == "xlsx":
        text = _extract_xlsx_text(target_path)
    elif kind == "html":
        text = _extract_html_text(contents)
    else:
        return []

    chunks = _chunk_text(text, chunk_size, chunk_overlap)
    for i, ch in enumerate(chunks):
        docs.append(Document(
            id=f"{source_name}-{i}",
            text=ch,
            metadata={"source": source_name,
                        "chunk": i,
                        "n_chunks": len(chunks),
                        "kind": kind}))
    return docs


# -------- /v1/data/load_jsonl — stream a server-side JSONL into pipe -
class LoadJsonlRequest(BaseModel):
    path: str                          # server-side absolute path
    batch_size: int = 500              # docs per pipe.add_documents call
    max_docs: Optional[int] = None
    skip_existing: bool = True         # don't re-add already-indexed ids


@app.post("/v1/data/load_jsonl")
def data_load_jsonl_endpoint(req: LoadJsonlRequest,
                                       request: Request):
    """Stream a JSONL corpus from the server filesystem into the live
    pipeline. Each line: {id, text, metadata}.

    Use this for huge corpora that don't fit in an HTTP upload.
    Memory-safe: only `batch_size` docs in RAM at a time.
    """
    import os
    from pathlib import Path
    from ..middleware.corpus_ingestion import (
        get_corpus_ingestion_tracker, STAGE_PARSE, STAGE_INDEX,
    )
    from ..core.types import Document

    p = Path(req.path).expanduser().resolve()
    if not p.exists():
        raise HTTPException(
            status_code=404,
            detail=f"path does not exist: {p}")
    if not p.is_file():
        raise HTTPException(
            status_code=400,
            detail=f"not a file: {p}")
    if req.batch_size < 1:
        raise HTTPException(
            status_code=400,
            detail="batch_size must be >= 1")

    pipe = None
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
        if not hasattr(pipe, "add_documents"):
            pipe = None
    except Exception:
        pipe = None

    tracker = get_corpus_ingestion_tracker()
    n_lines = 0
    n_parsed = 0
    n_indexed = 0
    n_skipped_dupe = 0
    parse_errors = 0
    batch: List[Document] = []
    seen_ids: set = set()

    def flush(b):
        nonlocal n_indexed
        if not b:
            return
        if pipe is not None:
            try:
                pipe.add_documents(b)
                n_indexed += len(b)
                for d in b:
                    tracker.record(d.id,
                                       d.metadata.get("source", "?"),
                                       STAGE_INDEX, True, 0)
            except Exception:
                pass

    with open(p, encoding="utf-8") as f:
        for line in f:
            n_lines += 1
            line = line.strip()
            if not line:
                continue
            try:
                row = json.loads(line)
            except Exception:
                parse_errors += 1
                continue
            if not isinstance(row, dict):
                continue
            text = row.get("text") or row.get("chunk") or ""
            doc_id = str(row.get("id",
                                       f"jsonl-{n_lines}"))
            if not text:
                continue
            if req.skip_existing and doc_id in seen_ids:
                n_skipped_dupe += 1
                continue
            seen_ids.add(doc_id)
            d = Document(
                id=doc_id, text=str(text),
                metadata=dict(row.get("metadata", {})))
            tracker.record(d.id,
                              d.metadata.get("source", "?"),
                              STAGE_PARSE, True, 0)
            batch.append(d)
            n_parsed += 1
            if len(batch) >= req.batch_size:
                flush(batch)
                batch = []
            if req.max_docs and n_parsed >= req.max_docs:
                break
    flush(batch)

    return {
        "ok": True,
        "path": str(p),
        "size_bytes": p.stat().st_size,
        "n_lines_read": n_lines,
        "n_parse_errors": parse_errors,
        "n_skipped_dupes": n_skipped_dupe,
        "n_docs_parsed": n_parsed,
        "n_docs_indexed": n_indexed,
        "pipeline_attached": pipe is not None,
        "batch_size": req.batch_size,
    }


# -------- /v1/stats — read-only stats, no admin scope required -------
def public_corpus_stats():
    """Public corpus stats: size of live index, ingestion counts."""
    from ..middleware.corpus_ingestion import \
        get_corpus_ingestion_tracker
    r = get_corpus_ingestion_tracker().analyze().to_dict()
    # Add pipeline-side size if available
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
        if hasattr(pipe, "doc_count"):
            r["live_index_size"] = pipe.doc_count()
        elif hasattr(pipe, "_docs"):
            r["live_index_size"] = len(pipe._docs)
        else:
            r["live_index_size"] = None
    except Exception:
        r["live_index_size"] = None
    return r


def public_quality_stats():
    """Public quality stats: helpfulness + numeric consistency hit rate."""
    from ..middleware.answer_helpfulness import \
        get_answer_helpfulness_tracker
    return get_answer_helpfulness_tracker().report().to_dict()


def public_gaps_stats(top_k: int = 10):
    """Public top content gaps — queries returning zero results."""
    from ..middleware.zero_result_tracker import \
        get_zero_result_tracker
    return get_zero_result_tracker().report(top_k=top_k).to_dict()


def public_latency_profile(query_text: str = "תום לב במשא ומתן",
                              n_iterations: int = 5,
                              top_k: int = 5) -> Dict[str, Any]:
    """Profile a query end-to-end across N iterations and return per-stage
    latency statistics (min/p50/p95/max).

    Stages measured:
        understand_ms · retrieve_ms · fuse_ms · rerank_ms · generate_ms ·
        verify_ms · post_filter_ms · enrich_ms · total_ms

    Useful for finding the actual bottleneck before optimizing. Run
    against representative queries to see the production latency profile.
    """
    import statistics
    import time as _time
    try:
        from ..pipeline import get_pipeline
        from ..core.types import Query
    except Exception as e:
        return {"error": f"imports failed: {e}"}

    pipe = get_pipeline()
    # Drop the semantic cache so each run actually re-executes
    if hasattr(pipe, "cache"):
        try: pipe.cache.clear()
        except Exception: pass

    iterations: List[Dict[str, float]] = []
    per_retriever_iters: List[Dict[str, float]] = []
    for i in range(max(1, n_iterations)):
        if hasattr(pipe, "cache"):
            try: pipe.cache.clear()  # ensure no cache hit between runs
            except Exception: pass
        t_start = _time.time()
        try:
            rag_resp = pipe.run(Query(
                text=query_text,
                k=top_k,
                rerank_k=top_k,
            ))
        except Exception as e:
            iterations.append({"error": str(e),
                                "total_ms": (_time.time() - t_start) * 1000})
            continue
        timing = (getattr(rag_resp, "timing_ms", None)
                   or getattr(rag_resp, "timings", None) or {})
        # Add total
        timing["total_ms"] = (_time.time() - t_start) * 1000
        iterations.append(timing)
        # Pull per-retriever timing (set by MultiRetriever side-channel)
        try:
            per_r = getattr(pipe.retrievers, "_last_per_retriever_ms", None)
            if per_r:
                per_retriever_iters.append(dict(per_r))
        except Exception:
            pass

    # Aggregate stats per stage
    valid = [it for it in iterations if "error" not in it]
    if not valid:
        return {"error": "all iterations failed",
                 "iterations": iterations}
    all_stages = set()
    for it in valid:
        all_stages.update(it.keys())
    stats = {}
    for stage in sorted(all_stages):
        values = [it[stage] for it in valid if stage in it
                   and isinstance(it[stage], (int, float))]
        if not values:
            continue
        stats[stage] = {
            "min":   round(min(values), 1),
            "p50":   round(statistics.median(values), 1),
            "p95":   round(sorted(values)[int(len(values) * 0.95) if len(values) > 1 else 0], 1),
            "max":   round(max(values), 1),
            "n":     len(values),
        }

    # Identify the bottleneck (largest p50)
    non_total = {k: v for k, v in stats.items() if k != "total_ms"}
    bottleneck = max(non_total.items(),
                     key=lambda x: x[1]["p50"]) if non_total else None

    # Aggregate per-retriever stats (when available)
    per_retriever_stats: Dict[str, Dict[str, float]] = {}
    if per_retriever_iters:
        all_retrievers = set()
        for it in per_retriever_iters:
            all_retrievers.update(it.keys())
        for ret in all_retrievers:
            vals = [it[ret] for it in per_retriever_iters if ret in it]
            if vals:
                per_retriever_stats[ret] = {
                    "min":  round(min(vals), 1),
                    "p50":  round(statistics.median(vals), 1),
                    "max":  round(max(vals), 1),
                    "n":    len(vals),
                }
    # Identify the slowest individual retriever
    slowest_retriever = None
    if per_retriever_stats:
        slowest_retriever = max(per_retriever_stats.items(),
                                  key=lambda x: x[1]["p50"])
    return {
        "query": query_text,
        "n_iterations": len(valid),
        "stages": stats,
        "bottleneck_stage": bottleneck[0] if bottleneck else None,
        "bottleneck_p50_ms": (bottleneck[1]["p50"] if bottleneck else None),
        "per_retriever": per_retriever_stats,
        "slowest_retriever": slowest_retriever[0] if slowest_retriever else None,
        "slowest_retriever_p50_ms": (slowest_retriever[1]["p50"]
                                        if slowest_retriever else None),
        "raw_iterations": iterations,
    }


def public_system_status() -> Dict[str, Any]:
    """One-stop status endpoint: aggregates the state of every subsystem.

    Returns a structured dict suitable for a sidebar health widget.
    Cheap (~10ms) — only reads existing in-memory caches; doesn't trigger
    builds.
    """
    out: Dict[str, Any] = {
        "version": "v2-2026-04-26",
        "subsystems": {},
    }
    # Pipeline / corpus
    try:
        from ..pipeline import get_pipeline
        from ..storage import (get_default_text_store, get_cache_store,
                                  fingerprint_corpus)
        pipe = get_pipeline()
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None) or [])
        n_docs = len(docs)
        n_lazy = sum(1 for d in docs if (getattr(d, "metadata", None)
                                            or {}).get("_lazy_text"))
        out["subsystems"]["corpus"] = {
            "n_docs": n_docs,
            "n_lazy_text": n_lazy,
            "fingerprint": fingerprint_corpus(pipe),
        }
        # Text store
        try:
            ts_stats = get_default_text_store().stats()
            out["subsystems"]["text_store"] = {
                "n_docs": ts_stats["n_docs"],
                "total_chars": ts_stats["total_chars"],
                "compressed_bytes": ts_stats["compressed_bytes"],
                "db_bytes": ts_stats["db_bytes"],
                "compression_pct": round(
                    100 * (1 - ts_stats["compression_ratio"]), 1),
            }
        except Exception as e:
            out["subsystems"]["text_store"] = {"error": str(e)}
        # Persistent cache
        try:
            cs_stats = get_cache_store().stats()
            out["subsystems"]["cache_store"] = cs_stats
        except Exception as e:
            out["subsystems"]["cache_store"] = {"error": str(e)}
    except Exception as e:
        out["subsystems"]["corpus"] = {"error": str(e)}
        return out

    # Domain coverage
    try:
        from collections import Counter
        domain_counts = Counter()
        for d in docs:
            md = (getattr(d, "metadata", None) or {})
            inner = md.get("metadata", md) if isinstance(md.get("metadata"), dict) else md
            dom = inner.get("domain") if isinstance(inner, dict) else None
            domain_counts[dom or "uncategorized"] += 1
        classified = sum(v for k, v in domain_counts.items()
                          if k != "uncategorized")
        out["subsystems"]["domain_classifier"] = {
            "n_classified": classified,
            "n_uncategorized": domain_counts.get("uncategorized", 0),
            "coverage_pct": round(100 * classified / max(n_docs, 1), 1),
            "n_distinct_domains": len([k for k in domain_counts
                                          if k != "uncategorized"]),
        }
    except Exception as e:
        out["subsystems"]["domain_classifier"] = {"error": str(e)}

    # Citation network
    try:
        cn = getattr(pipe, "_citation_network_cache", None)
        if cn is not None:
            out["subsystems"]["citation_network"] = {
                "n_docs": cn.n_docs,
                "n_edges": cn.n_edges,
                "n_unique_citations": len(cn.cited_by),
                "n_resolved": len(cn.doc_for_citation),
                "warm": True,
            }
        else:
            out["subsystems"]["citation_network"] = {"warm": False}
    except Exception as e:
        out["subsystems"]["citation_network"] = {"error": str(e)}

    # Precedent analyzer cache
    try:
        oc = getattr(pipe, "_overruled_cache", None)
        out["subsystems"]["precedent_analyzer"] = {
            "warm": oc is not None,
            "building": getattr(pipe, "_overruled_cache_building", False),
            "n_detections": len(oc) if oc else 0,
        }
    except Exception as e:
        out["subsystems"]["precedent_analyzer"] = {"error": str(e)}

    # Outcome stats cache
    try:
        oc = getattr(pipe, "_outcome_stats_cache", None)
        out["subsystems"]["outcome_stats"] = {
            "warm": oc is not None,
            "n_processed": (oc.get("n_processed", 0) if oc else 0),
        }
    except Exception as e:
        out["subsystems"]["outcome_stats"] = {"error": str(e)}

    # Judge stats cache
    try:
        js = getattr(pipe, "_judge_stats_cache", None)
        out["subsystems"]["judge_stats"] = {
            "warm": js is not None,
            "n_judges": len(js) if js else 0,
        }
    except Exception as e:
        out["subsystems"]["judge_stats"] = {"error": str(e)}

    # Hilbert retriever — float16?
    try:
        # Inspect the pipeline's retrievers
        retrs = getattr(pipe, "retrievers", None)
        hilbert_info = {"present": False, "float16": False, "n_vecs": 0}
        if retrs is not None:
            # The pipeline.retrievers is a MultiRetriever; pull its members
            members = getattr(retrs, "_retrievers", []) or \
                       getattr(retrs, "retrievers", [])
            for r in members:
                if getattr(r, "name", "") == "hilbert":
                    hilbert_info["present"] = True
                    hilbert_info["float16"] = (getattr(r, "_vecs_f16", None)
                                                 is not None)
                    if r._vecs_f16 is not None:
                        hilbert_info["n_vecs"] = int(r._vecs_f16.shape[0])
                        hilbert_info["bytes"] = int(r._vecs_f16.nbytes)
                    break
        out["subsystems"]["hilbert_retriever"] = hilbert_info
    except Exception as e:
        out["subsystems"]["hilbert_retriever"] = {"error": str(e)}

    # Query result cache (LRU on the MultiRetriever)
    try:
        retrs = getattr(pipe, "retrievers", None)
        if retrs is not None and hasattr(retrs, "cache_stats"):
            out["subsystems"]["query_cache"] = retrs.cache_stats()
        else:
            out["subsystems"]["query_cache"] = {"present": False}
    except Exception as e:
        out["subsystems"]["query_cache"] = {"error": str(e)}

    # Build progress (during long rebuild)
    try:
        out["subsystems"]["build_progress"] = public_build_progress()
    except Exception as e:
        out["subsystems"]["build_progress"] = {"error": str(e)}

    # Memory — process RSS + system pressure (psutil if available)
    try:
        import psutil as _ps_mem
        proc = _ps_mem.Process()
        meminfo = proc.memory_info()
        vmem = _ps_mem.virtual_memory()
        rss = meminfo.rss
        out["subsystems"]["memory"] = {
            "rss_bytes": int(rss),
            "rss_mb": round(rss / 1024 / 1024, 1),
            "rss_gb": round(rss / 1024 / 1024 / 1024, 2),
            "system_total_gb": round(vmem.total / 1024 / 1024 / 1024, 1),
            "system_available_gb": round(vmem.available / 1024 / 1024 / 1024, 1),
            "system_used_pct": round(vmem.percent, 1),
            "process_share_pct": round(100 * rss / vmem.total, 1),
        }
    except ImportError:
        out["subsystems"]["memory"] = {"error": "psutil not installed"}
    except Exception as e:
        out["subsystems"]["memory"] = {"error": str(e)}

    return out


def _trigger_precedent_cache_async(pipe) -> None:
    """Kick off the corpus-wide overruled-detection cache build in a
    background thread so user queries don't block on it.

    First tries to load from persistent SQLite cache (instant warm-up
    after restart). If fingerprint mismatch or no cache, runs the full
    detection (~30s on 5k docs) and persists the result.

    Safe to call repeatedly — uses an attr flag to prevent overlapping
    builds.
    """
    if getattr(pipe, "_overruled_cache_building", False):
        return
    pipe._overruled_cache_building = True

    def _build():
        try:
            from ..storage import get_cache_store, fingerprint_corpus
            cache_store = get_cache_store()
            fp = fingerprint_corpus(pipe)
            # Try persistent cache first
            cached = cache_store.get("overruled_v1", fp)
            if cached is not None:
                pipe._overruled_cache = cached
                print(f"[tau-rag] overruled cache loaded from disk: "
                      f"{len(cached)} detections (fingerprint match)")
                return
            # Cold build
            import time as _t
            t0 = _t.time()
            from ..precedent_analyzer import detect_overrulings
            indexed = (getattr(pipe, "_indexed_docs", None)
                        or getattr(pipe, "_docs", None) or [])
            dets = detect_overrulings(indexed)
            pipe._overruled_cache = dets
            # Persist for next restart
            try:
                cache_store.set("overruled_v1", fp, dets)
            except Exception:
                pass
            print(f"[tau-rag] overruled cache built async: "
                  f"{len(dets)} detections in {_t.time()-t0:.1f}s "
                  f"(persisted)")
        except Exception as e:
            pipe._overruled_cache = []
            print(f"[tau-rag] overruled cache async build failed: {e}")
        finally:
            pipe._overruled_cache_building = False

    import threading
    threading.Thread(target=_build, daemon=True,
                      name="overruled-cache-build").start()


def _filter_docs_by_section_type(docs: list, section_type: str) -> list:
    """Keep only docs whose chunk text appears in the given section type
    of the parent judgment (per the structurer).

    Used as a post-retrieval filter when the user passes
    `filters.section_type=discussion` etc. — for example to search only
    inside the 'דיון' parts of judgments and ignore the rest.

    Implementation: for each doc, fetch parent text, run structurer once
    (cached), then check which section the chunk falls into using
    char_start/end offsets. If the chunk's text appears inside the
    requested section, keep the doc.
    """
    if not section_type or not docs:
        return docs
    try:
        from ..pipeline import get_pipeline
        from ..judgment_structurer import structure_judgment
    except Exception:
        return docs
    pipe = get_pipeline()
    if not hasattr(pipe, "_struct_for_filter_cache"):
        pipe._struct_for_filter_cache = {}
    cache = pipe._struct_for_filter_cache

    # Performance budget: limit total time spent in this filter (so user
    # queries never block forever on cold caches with thousands of parent
    # docs to structure).
    import time as _t
    deadline = _t.time() + 1.5    # 1.5 seconds total budget per query

    out = []
    for d in docs:
        if _t.time() > deadline:
            # Time's up — return what we've already filtered. The other
            # docs pass through unchanged (better than blocking).
            out.extend(docs[len(out):])
            break
        if not isinstance(d, dict):
            continue
        chunk_text = (d.get("text") or "")[:200]
        if len(chunk_text) < 30:
            out.append(d)  # too short to filter — keep
            continue
        doc_id = d.get("doc_id")
        if not doc_id:
            continue
        # Fetch parent doc text (lazy-aware)
        if doc_id in cache:
            sections = cache[doc_id]
        else:
            try:
                parent = next((x for x in (getattr(pipe, "_indexed_docs",
                                                    None) or [])
                                 if getattr(x, "id", None) == doc_id), None)
                if parent is None:
                    continue
                text = (getattr(parent, "text", None) or "")
                if not text and (getattr(parent, "metadata", None)
                                   or {}).get("_lazy_text"):
                    try:
                        text = pipe.get_text(doc_id) or ""
                    except Exception:
                        text = ""
                if not text:
                    cache[doc_id] = []
                    continue
                struct = structure_judgment(text)
                sections = struct.get("sections", [])
                cache[doc_id] = sections
            except Exception:
                cache[doc_id] = []
                continue

        # Check whether the chunk lives in a section of the requested type
        keep = False
        for s in sections:
            if s.get("id") != section_type:
                continue
            sec_text = s.get("text", "")
            if chunk_text in sec_text:
                keep = True
                d["_section_match"] = section_type
                break
        if keep:
            out.append(d)
    return out


def public_export_answer_docx(body: Dict[str, Any]) -> bytes:
    """Generate a Hebrew legal memo .docx from an answer payload.

    Body shape: {"question": str, "payload": {answer, docs, confidence, ...}}
    Returns raw bytes of the docx file.
    """
    from io import BytesIO
    try:
        from docx import Document as DocxDocument
        from docx.shared import Pt, Inches, RGBColor
        from docx.enum.text import WD_ALIGN_PARAGRAPH
        from docx.oxml.ns import qn
        from docx.oxml import OxmlElement
    except ImportError:
        raise RuntimeError("python-docx not installed; pip install python-docx")

    question = (body.get("question") or "").strip()
    payload = body.get("payload") or {}
    answer = (payload.get("answer") or "").strip()
    docs = payload.get("docs") or []
    confidence = payload.get("confidence")
    domain = (payload.get("understanding") or {}).get("domain")

    doc = DocxDocument()
    # Set default font + RTL section properties
    style = doc.styles['Normal']
    style.font.name = 'David'
    style.font.size = Pt(11)
    rPr = style.element.get_or_add_rPr()
    rFonts = rPr.find(qn('w:rFonts'))
    if rFonts is None:
        rFonts = OxmlElement('w:rFonts')
        rPr.append(rFonts)
    rFonts.set(qn('w:cs'), 'David')
    rFonts.set(qn('w:hAnsi'), 'David')

    def _rtl_para(p):
        """Mark paragraph as RTL (Hebrew)."""
        pPr = p._p.get_or_add_pPr()
        bidi = OxmlElement('w:bidi')
        bidi.set(qn('w:val'), '1')
        pPr.append(bidi)

    # Title
    title = doc.add_heading(question or "תשובה משפטית", level=1)
    _rtl_para(title)
    title.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    for run in title.runs:
        run.font.color.rgb = RGBColor(0x4F, 0x46, 0xE5)

    # Subtitle / metadata
    import datetime as _dt
    sub = doc.add_paragraph()
    _rtl_para(sub)
    sub.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    r = sub.add_run(f"נוצר על-ידי tau-rag · {_dt.date.today().strftime('%d/%m/%Y')}")
    r.italic = True
    r.font.size = Pt(9)
    r.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8)
    if domain:
        sub.add_run(f"   ·   תחום: {domain}").font.size = Pt(9)
    if confidence is not None:
        sub.add_run(f"   ·   ביטחון: {round(float(confidence)*100)}%").font.size = Pt(9)

    # Answer body
    h = doc.add_heading("תשובה", level=2)
    _rtl_para(h); h.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    for para in (answer.split("\n\n") if answer else [""]):
        if not para.strip():
            continue
        p = doc.add_paragraph(para.strip())
        _rtl_para(p)
        p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        p.paragraph_format.line_spacing = 1.5

    # Sources block
    if docs:
        h2 = doc.add_heading(f"מקורות ({len(docs)})", level=2)
        _rtl_para(h2); h2.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        for i, d in enumerate(docs[:10], 1):
            md = (d.get("metadata") or {}) if isinstance(d, dict) else {}
            title_text = (md.get("citation") or md.get("title")
                           or d.get("doc_id", "?"))
            p = doc.add_paragraph(style="List Number")
            _rtl_para(p)
            p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
            r1 = p.add_run(str(title_text))
            r1.bold = True
            extras = []
            if md.get("court"): extras.append(str(md["court"]))
            if md.get("verdict_dt"): extras.append(str(md["verdict_dt"])[:10])
            ps = d.get("precedent_status") or {}
            if ps.get("is_overruled"):
                extras.append("⚠ הלכה שבוטלה")
            if extras:
                r2 = p.add_run("   ·   " + "   ·   ".join(extras))
                r2.font.size = Pt(9)
                r2.font.color.rgb = RGBColor(0x47, 0x55, 0x69)
            # Excerpt
            text = (d.get("text") if isinstance(d, dict) else "") or ""
            import re as _re_x
            excerpt = _re_x.sub(r"\[[^\]]+\]\s*", "", text).strip()[:240]
            if excerpt:
                ep = doc.add_paragraph()
                _rtl_para(ep)
                ep.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                ep.paragraph_format.left_indent = Inches(0.25)
                er = ep.add_run("« " + excerpt + " »")
                er.italic = True
                er.font.size = Pt(9)
                er.font.color.rgb = RGBColor(0x47, 0x55, 0x69)

    # Footer note
    fp = doc.add_paragraph()
    _rtl_para(fp)
    fp.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    fr = fp.add_run("\n— מסמך זה נוצר אוטומטית על ידי tau-rag לצרכי מחקר משפטי. "
                     "אין בו ייעוץ משפטי. אנא אמת מקורות מול גרסה רשמית של פסקי הדין.")
    fr.italic = True
    fr.font.size = Pt(8)
    fr.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8)

    buf = BytesIO()
    doc.save(buf)
    return buf.getvalue()


def _enrich_docs_with_precedent_status(docs: list) -> None:
    """Add `precedent_status` to each doc in-place.

    Status shape: {is_overruled: bool, kind: str|None,
                    overruling_doc_id: str|None, overruling_title: str|None,
                    confidence: float}

    NON-BLOCKING: if the corpus-wide overruled cache is cold, this returns
    immediately without enriching (rather than spending 30+ seconds doing
    the cold build during a user query). The cache is built either at
    startup (see _build_precedent_caches_async) or on-demand via the
    /v1/precedents/overruled endpoint.
    """
    if not docs:
        return
    try:
        from ..pipeline import get_pipeline
        from ..precedent_analyzer import is_overruled
        pipe = get_pipeline()
        cached = getattr(pipe, "_overruled_cache", None)
        if cached is None:
            # Cache cold — kick off background build but don't wait. Mark
            # all docs as "unknown" and return immediately.
            _trigger_precedent_cache_async(pipe)
            for d in docs:
                if isinstance(d, dict):
                    d["precedent_status"] = {
                        "is_overruled": False, "kind": None,
                        "overruling_doc_id": None,
                        "overruling_title": None, "confidence": 0.0,
                    }
            return
        if not cached:
            # No detections in this corpus — mark all as unknown
            for d in docs:
                if isinstance(d, dict):
                    d["precedent_status"] = {
                        "is_overruled": False, "kind": None,
                        "overruling_doc_id": None,
                        "overruling_title": None, "confidence": 0.0,
                    }
            return

        for d in docs:
            if not isinstance(d, dict):
                continue
            meta = d.get("metadata") or {}
            citation = meta.get("citation") or d.get("title") or d.get("doc_id", "")
            match = is_overruled(citation, cached)
            if match:
                d["precedent_status"] = {
                    "is_overruled": match.get("kind") in
                                     ("explicit_overrule", "depart_from"),
                    "kind": match.get("kind"),
                    "overruling_doc_id": match.get("overruling_doc_id"),
                    "overruling_title": match.get("overruling_title"),
                    "snippet": match.get("snippet", "")[:200],
                    "confidence": float(match.get("confidence", 0.0)),
                }
            else:
                d["precedent_status"] = {
                    "is_overruled": False, "kind": None,
                    "overruling_doc_id": None,
                    "overruling_title": None, "confidence": 0.0,
                }
    except Exception:
        pass


def public_judge_stats(top_k: int = 20):
    """Per-judge analytics: case counts, outcome distribution, top-domains.

    Walks the live pipeline once: for each judgment-like doc, extracts
    the judges (using legal_entities) + outcome (using judgment_structurer)
    and aggregates.

    Cached on the pipeline; ~5-10s build for ~5000 docs, instant after.
    """
    try:
        from ..pipeline import get_pipeline
        from ..legal_entities import extract_judges
        from ..judgment_structurer import structure_judgment
        from ..storage import get_cache_store, fingerprint_corpus
        pipe = get_pipeline()
        # In-memory cache (fastest)
        cached = getattr(pipe, "_judge_stats_cache", None)
        if cached is not None:
            return {"top_k": top_k, "judges": cached[:top_k],
                     "n_total": len(cached), "cached": True}
        # Persistent cache (instant warm-up after restart)
        try:
            fp = fingerprint_corpus(pipe)
            cs = get_cache_store()
            persisted = cs.get("judge_stats_v1", fp)
            if persisted is not None:
                pipe._judge_stats_cache = persisted
                return {"top_k": top_k, "judges": persisted[:top_k],
                         "n_total": len(persisted), "cached": True}
        except Exception:
            pass
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None) or [])
        outcomes_init = ("accepted", "rejected", "partial",
                          "remanded", "unknown")
        # judge_normalized → stats dict
        agg: Dict[str, Dict[str, Any]] = {}
        for d in docs:
            doc_id = getattr(d, "id", None)
            if not doc_id:
                continue
            text = getattr(d, "text", "") or ""
            md = getattr(d, "metadata", None) or {}
            if not text and md.get("_lazy_text"):
                try:
                    text = pipe.get_text(doc_id) or ""
                except Exception:
                    continue
            if len(text) < 200:
                continue
            judges = extract_judges(text)
            if not judges:
                continue
            try:
                struct = structure_judgment(text)
                outcome = struct.get("outcome", "unknown")
            except Exception:
                outcome = "unknown"
            domain = md.get("domain")
            for j in judges:
                key = j["normalized"] or j["name"]
                if key not in agg:
                    agg[key] = {
                        "judge": key,
                        "name": j["name"],
                        "title": j["title"],
                        "surname": j.get("surname"),
                        "n_cases": 0,
                        "outcomes": {o: 0 for o in outcomes_init},
                        "domains": {},
                        "doc_ids": [],
                    }
                row = agg[key]
                row["n_cases"] += 1
                row["outcomes"][outcome] = row["outcomes"].get(outcome, 0) + 1
                if domain:
                    row["domains"][domain] = row["domains"].get(domain, 0) + 1
                if len(row["doc_ids"]) < 12:
                    row["doc_ids"].append(doc_id)
        # Sort by n_cases descending
        rows = sorted(agg.values(), key=lambda x: -x["n_cases"])
        # Compute "top domain" per judge for compact display
        for r in rows:
            r["top_domain"] = (max(r["domains"].items(),
                                     key=lambda x: x[1])[0]
                                if r["domains"] else None)
        pipe._judge_stats_cache = rows
        # Persist for next restart
        try:
            cs.set("judge_stats_v1", fp, rows)
        except Exception:
            pass
        return {"top_k": top_k, "judges": rows[:top_k],
                 "n_total": len(rows), "cached": False}
    except Exception as e:
        return {"error": str(e)}, 500


def public_judgment_entities(doc_id: str):
    """Hebrew legal NER for one judgment.

    Returns extracted entities: case_number (with full taxonomy),
    court level, judges, lawyers, parties (with role classification),
    section references, and topic keywords. Lazy-text aware.
    """
    if not doc_id:
        return {"error": "missing doc_id"}, 404
    try:
        from ..pipeline import get_pipeline
        from ..legal_entities import extract_entities
        pipe = get_pipeline()
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None) or [])
        target = next((d for d in docs
                        if (getattr(d, "id", None) == doc_id)
                        or (getattr(d, "id", "") or "").endswith(doc_id)), None)
        if target is None:
            return {"error": "doc not found", "doc_id": doc_id}, 404
        text = getattr(target, "text", "") or ""
        if not text:
            md = getattr(target, "metadata", None) or {}
            if md.get("_lazy_text"):
                text = pipe.get_text(target.id) or ""
        result = extract_entities(text)
        result["doc_id"] = doc_id
        return result
    except Exception as e:
        return {"error": str(e), "doc_id": doc_id}, 500


def public_overruled_precedents(top_k: int = 50):
    """Detect overrulings across the entire corpus by scanning for
    Hebrew overruling phrases. Cached after first call.
    """
    try:
        from ..pipeline import get_pipeline
        from ..precedent_analyzer import detect_overrulings
        pipe = get_pipeline()
        cached = getattr(pipe, "_overruled_cache", None)
        if cached is not None:
            return {"detections": cached[:top_k],
                     "n_total": len(cached), "cached": True}
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None) or [])
        dets = detect_overrulings(docs)
        # Sort by confidence desc, prefer explicit_overrule
        kind_order = {"explicit_overrule": 0, "depart_from": 1,
                      "implicit_overrule": 2, "distinguish": 3,
                      "reaffirm": 4}
        dets.sort(key=lambda x: (kind_order.get(x.get("kind"), 9),
                                   -x.get("confidence", 0)))
        pipe._overruled_cache = dets
        return {"detections": dets[:top_k], "n_total": len(dets),
                 "cached": False}
    except Exception as e:
        return {"error": str(e)}, 500


def public_doctrine_evolution(keyword: str, top_k: int = 50):
    """Find judgments that mention a specific doctrine, ordered chronologically.

    Example: keyword='אפרופים' → all docs that reference הלכת אפרופים,
    sorted by verdict_dt ascending. Useful for tracing how a doctrine
    has evolved or been narrowed/expanded over time.
    """
    if not keyword:
        return {"error": "missing keyword"}, 400
    try:
        from ..pipeline import get_pipeline
        from ..precedent_analyzer import find_doctrine_evolution
        pipe = get_pipeline()
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None) or [])
        hits = find_doctrine_evolution(docs, keyword)
        return {"keyword": keyword, "n_total": len(hits),
                 "hits": hits[:top_k]}
    except Exception as e:
        return {"error": str(e)}, 500


def public_judgment_network(doc_id: str):
    """Citation network for one judgment: cites + cited_by + cocited.

    Walks the citation index (built lazily via citation_network.get_or_build),
    pulls the doc's outgoing citations, the docs that cite it (when this
    doc's metadata.citation matches an incoming reference), and a
    co-citation recommendation list (other docs sharing ≥1 citation).
    """
    if not doc_id:
        return {"error": "missing doc_id"}, 404
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build, network_for_doc
        pipe = get_pipeline()
        # Verify doc exists in the pipeline
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None) or [])
        target = next((d for d in docs
                        if (getattr(d, "id", None) == doc_id)
                        or (getattr(d, "id", "") or "").endswith(doc_id)), None)
        if target is None:
            return {"error": "doc not found", "doc_id": doc_id}, 404
        cn = get_or_build(pipe)
        result = network_for_doc(cn, target.id)
        # Enrich cited_by + cocited entries with light metadata for display
        def _enrich(did: str) -> Dict[str, Any]:
            d = next((x for x in docs if getattr(x, "id", None) == did), None)
            if not d:
                return {"doc_id": did}
            md = getattr(d, "metadata", None) or {}
            return {
                "doc_id": did,
                "title": md.get("citation") or md.get("title") or did,
                "domain": md.get("domain"),
                "court":  md.get("court"),
                "verdict_dt": md.get("verdict_dt"),
            }
        result["cited_by"] = [_enrich(d) for d in result["cited_by"]]
        result["cocited"]  = [{**c, **_enrich(c["doc_id"])} for c in result["cocited"]]
        return result
    except Exception as e:
        return {"error": str(e), "doc_id": doc_id}, 500


def public_popular_citations(top_k: int = 25, kind: Optional[str] = None):
    """Most-cited references in the corpus.
    Returns ranked list with kind, n_citers, resolved_doc_id (when the
    cited thing is itself a doc in the corpus)."""
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build, popular_citations
        cn = get_or_build(get_pipeline())
        rows = popular_citations(cn, top_k=top_k, kind=kind)
        return {"top_k": top_k, "kind": kind,
                 "rows": rows, "n_total_citations": len(cn.cited_by)}
    except Exception as e:
        return {"error": str(e)}, 500


def public_outcome_stats():
    """Per-domain breakdown of judgment outcomes.

    Walks every doc that the structurer can produce an outcome for and
    tallies counts by (domain, outcome). Cached per-pipeline.
    """
    try:
        from ..pipeline import get_pipeline
        from ..judgment_structurer import structure_judgment
        from ..domain_classifier import all_domains_meta
        pipe = get_pipeline()
        # Cache the heavy walk: in-memory first, then persistent
        cached = getattr(pipe, "_outcome_stats_cache", None)
        if cached is not None:
            return cached
        try:
            from ..storage import get_cache_store, fingerprint_corpus
            cs = get_cache_store()
            fp = fingerprint_corpus(pipe)
            persisted = cs.get("outcome_stats_v1", fp)
            if persisted is not None:
                pipe._outcome_stats_cache = persisted
                return persisted
        except Exception:
            pass
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None) or [])
        # Initialize per-domain bucket
        per_domain: Dict[str, Dict[str, int]] = {}
        outcomes = ("accepted", "rejected", "partial",
                    "remanded", "unknown")
        domain_meta = {d["id"]: d for d in all_domains_meta()}
        for did in domain_meta.keys():
            per_domain[did] = {o: 0 for o in outcomes}
        per_domain["__uncategorized__"] = {o: 0 for o in outcomes}
        totals: Dict[str, int] = {o: 0 for o in outcomes}
        n_processed = 0
        seen_ids: set = set()
        for d in docs:
            doc_id = getattr(d, "id", None)
            if not doc_id or doc_id in seen_ids:
                continue
            seen_ids.add(doc_id)
            text = getattr(d, "text", "") or ""
            md = getattr(d, "metadata", None) or {}
            # Lazy-text: fetch from store on demand. Skipping the lookup
            # for already-processed cached results below.
            if not text and md.get("_lazy_text"):
                try:
                    text = pipe.get_text(doc_id) or ""
                except Exception:
                    text = ""
            # Skip really short docs — they're usually summaries/snippets
            # without an operative outcome.
            if len(text) < 200:
                continue
            inner = md.get("metadata", md) if isinstance(md.get("metadata"), dict) else md
            domain = inner.get("domain") if isinstance(inner, dict) else None
            try:
                struct = structure_judgment(text)
                outcome = struct.get("outcome", "unknown")
            except Exception:
                outcome = "unknown"
            bucket = per_domain.get(domain) if domain else per_domain["__uncategorized__"]
            if not bucket:
                bucket = per_domain.setdefault(domain, {o: 0 for o in outcomes})
            bucket[outcome] = bucket.get(outcome, 0) + 1
            totals[outcome] = totals.get(outcome, 0) + 1
            n_processed += 1
        # Build response
        rows = []
        for did, counts in per_domain.items():
            total = sum(counts.values())
            if total == 0:
                continue
            meta = domain_meta.get(did) or {
                "id": did, "label_he": "לא מסווג",
                "icon": "❓", "color": "#94a3b8",
            }
            rows.append({**meta, "counts": counts, "total": total})
        rows.sort(key=lambda r: -r["total"])
        result = {
            "n_processed": n_processed,
            "totals": totals,
            "domains": rows,
        }
        pipe._outcome_stats_cache = result
        # Persist for next restart
        try:
            cs.set("outcome_stats_v1", fp, result)
        except Exception:
            pass
        return result
    except Exception as e:
        return {"error": str(e)}, 500


def public_judgment_structured(doc_id: str):
    """Return structured-judgment view for a single corpus doc.

    Walks the live pipeline's indexed docs, finds the requested doc_id,
    runs the Hebrew judgment structurer over its full text, and returns
    the JSON payload (header / sections / citations / outcome / stats).

    Cached per-process by doc_id+text-hash so repeated opens are cheap.
    Falls back to a low-confidence single-section payload when text
    can't be cleanly segmented.

    Returns 404 when the doc isn't in the index.
    """
    if not doc_id:
        return {"error": "missing doc_id"}, 404

    # Lazy module-level cache
    global _JUDGMENT_STRUCT_CACHE
    try:
        _JUDGMENT_STRUCT_CACHE
    except NameError:
        _JUDGMENT_STRUCT_CACHE = {}

    try:
        from ..pipeline import get_pipeline
        from ..judgment_structurer import structure_judgment, section_catalog
        pipe = get_pipeline()
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None)
                or [])

        # Find the doc by id (or by suffix match for resilience to slashes)
        target = None
        for d in docs:
            did = getattr(d, "id", None) or getattr(d, "doc_id", None)
            if did == doc_id or (did and did.endswith("/" + doc_id)) \
                    or (did and did.endswith(doc_id)):
                target = d
                break
        if target is None:
            return {"error": "doc not found", "doc_id": doc_id}, 404

        text = getattr(target, "text", "") or ""
        meta = getattr(target, "metadata", None) or {}
        # Lazy-text fallback: when add_documents stripped text on ingest,
        # fetch it from the SQLite-backed store on demand.
        if not text and meta.get("_lazy_text"):
            try:
                text = pipe.get_text(target.id) or ""
            except Exception:
                pass
        # Cache key: doc id + first 80 chars hash (catches edits)
        import hashlib as _hl
        key = f"{doc_id}:{_hl.md5(text[:200].encode('utf-8')).hexdigest()[:8]}"
        cached = _JUDGMENT_STRUCT_CACHE.get(key)
        if cached is not None:
            return cached

        result = structure_judgment(text)
        # Full-text safety net: ALWAYS attach the original text so the
        # frontend has a fallback if structuring loses anything. Plus a
        # coverage ratio so the UI can warn when sections don't reconstruct
        # the source.
        section_chars = sum(len((s or {}).get("text", ""))
                              for s in result.get("sections", []))
        full_len = max(len(text), 1)
        result["coverage"] = round(section_chars / full_len, 3)
        result["full_text"] = text
        # Layer in supplementary metadata from the corpus record so the
        # frontend doesn't need to hit a second endpoint.
        result["doc_id"] = doc_id
        result["metadata"] = {
            "title": meta.get("title") or meta.get("citation"),
            "court": meta.get("court"),
            "verdict_dt": meta.get("verdict_dt"),
            "domain": meta.get("domain"),
            "type": meta.get("type"),
            "source": meta.get("source"),
            "kind": meta.get("kind"),
        }
        # Override extracted header values when the corpus has authoritative
        # metadata (rather than what we sniffed from the text head).
        if meta.get("court") and not result["header"].get("court"):
            result["header"]["court"] = meta["court"]
        if meta.get("citation") and not result["header"].get("case_number"):
            result["header"]["case_number"] = meta["citation"]
        if meta.get("verdict_dt") and not result["header"].get("date"):
            result["header"]["date"] = str(meta["verdict_dt"])[:10]

        # Section catalog included so frontend can render an "Unknown"
        # tab for sections we recognize but didn't find.
        result["catalog"] = section_catalog()

        _JUDGMENT_STRUCT_CACHE[key] = result
        # Cap cache to ~500 entries (LRU-ish: drop oldest by insertion)
        if len(_JUDGMENT_STRUCT_CACHE) > 500:
            for k in list(_JUDGMENT_STRUCT_CACHE.keys())[:50]:
                _JUDGMENT_STRUCT_CACHE.pop(k, None)
        return result
    except Exception as e:
        return {"error": str(e), "doc_id": doc_id}, 500


def public_classify_query(text: str):
    """Classify a free-text legal query into legal domain(s).

    Used by the frontend to auto-suggest domain filter chips while the
    user types. Pure rule-based — runs in <1ms — so it's safe to call
    on every keystroke (with frontend debouncing).

    Returns:
        {
          "top": "contracts" | None,
          "ranked": [{"domain":"contracts","score":12.0,"matches":[...]}, ...],
          "scores": {"contracts": 12.0, "torts": 3.0},
          "catalog": [domain meta dicts...]    # for chip rendering
        }
    """
    from ..domain_classifier import classify, domain_meta, all_domains_meta
    if not text or not text.strip():
        return {"top": None, "ranked": [], "scores": {},
                 "catalog": all_domains_meta()}
    res = classify(text)
    # Enrich ranked entries with display meta (icon/color/label) so the
    # frontend doesn't need a second lookup.
    for r in res.get("ranked", []):
        m = domain_meta(r["domain"])
        if m:
            r.update(m)
    res["catalog"] = all_domains_meta()
    return res


def public_domain_stats():
    """Counts of indexed docs grouped by legal domain.

    Walks the live pipeline's documents (each is stamped with
    metadata.domain at load time by _autoload_corpus_on_startup), tallies
    per-domain counts, and returns alongside the full domain catalog so
    the frontend can render the sidebar browser even for empty domains.

    Each chunk shares its parent doc's domain, so we de-dupe by doc_id
    before counting. If the pipeline's doc list is unavailable, returns
    just the catalog with zero counts (graceful degradation).
    """
    from ..domain_classifier import all_domains_meta as _meta
    catalog = _meta()
    by_domain: Dict[str, int] = {d["id"]: 0 for d in catalog}
    by_domain_caselaw: Dict[str, int] = {d["id"]: 0 for d in catalog}
    by_domain_statute: Dict[str, int] = {d["id"]: 0 for d in catalog}
    uncategorized = 0
    total = 0

    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
        # Pipeline keeps an _indexed_docs attr — list[Document] — set when
        # add_documents runs. Fall back to other shapes for forward-compat.
        docs = (getattr(pipe, "_indexed_docs", None)
                or getattr(pipe, "_docs", None)
                or [])
        seen_ids = set()
        for d in docs:
            doc_id = getattr(d, "id", None) or getattr(d, "doc_id", None)
            if not doc_id or doc_id in seen_ids:
                continue
            seen_ids.add(doc_id)
            total += 1
            md = getattr(d, "metadata", None) or {}
            if isinstance(md, dict):
                inner = md.get("metadata", md) if isinstance(md.get("metadata"), dict) else md
                domain = inner.get("domain") if isinstance(inner, dict) else None
                kind = (inner.get("type") or inner.get("kind") or "").lower() \
                    if isinstance(inner, dict) else ""
            else:
                domain, kind = None, ""
            if domain and domain in by_domain:
                by_domain[domain] += 1
                if kind in ("caselaw", "psak", "פסיקה"):
                    by_domain_caselaw[domain] += 1
                elif kind in ("statute", "law", "חוק", "חקיקה"):
                    by_domain_statute[domain] += 1
            else:
                uncategorized += 1
    except Exception as _e:
        return {
            "total": 0,
            "uncategorized": 0,
            "domains": [{**d, "count": 0, "caselaw": 0, "statute": 0}
                         for d in catalog],
            "error": str(_e),
        }

    domains_out = [
        {**d,
         "count": by_domain.get(d["id"], 0),
         "caselaw": by_domain_caselaw.get(d["id"], 0),
         "statute": by_domain_statute.get(d["id"], 0)}
        for d in catalog
    ]
    # Sort: non-zero counts first (desc), then alphabetical
    domains_out.sort(key=lambda x: (-x["count"], x["label_he"]))
    return {
        "total": total,
        "uncategorized": uncategorized,
        "domains": domains_out,
    }


def feedback_endpoint(req: FeedbackRequest, request: Request):
    """Public end-user feedback. Doesn't require admin scope —
    just user identity (any X-API-Key, including dev keys)."""
    from ..middleware.answer_helpfulness import \
        get_answer_helpfulness_tracker
    try:
        f = get_answer_helpfulness_tracker().submit(
            request_id=req.request_id,
            vote=req.vote,
            reasons=req.reasons or [],
            free_text=req.free_text or "",
            user_id=req.session_id,   # use session as pseudonymous id
        )
        return {"ok": True, "feedback_id": f.feedback_id}
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))


def platform_changelog(
    format: str = "json",
    category: Optional[str] = None,
    version_prefix: Optional[str] = None,
    ported_only: bool = False,
):
    from ..middleware.changelog_gen import get_changelog_generator
    g = get_changelog_generator()
    if format == "json":
        if category or version_prefix or ported_only:
            return {"entries": g.list(category=category,
                                         version_prefix=version_prefix,
                                         ported_only=ported_only)}
        return g.render_json()
    if format == "markdown":
        return PlainTextResponse(g.render_markdown())
    if format == "text":
        return PlainTextResponse(g.render_text())
    if format == "compact":
        return g.render_compact_summary()
    raise HTTPException(status_code=400,
                        detail={"error": "invalid format",
                                "valid": ["json", "markdown", "text",
                                           "compact"]})


def latest_signals():
    if not _pipeline.cache:
        return {"empty": True}
    last = list(_pipeline.cache.values())[-1]
    return last.signals.to_dict()


# ---- Chat / multi-turn ---------------------------------------------------
def chat(req: ChatRequest, request: Request):
    """Non-streaming conversational endpoint with session memory."""
    try:
        validate_query_text(req.query)
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))

    resp = _pipeline.run_conversation(req.query, req.session_id, lang=req.lang)
    from ..memory import get_store
    session = get_store().get_or_create(req.session_id)
    out = resp.to_dict()
    out["session_id"] = req.session_id
    out["n_turns"] = len(session.turns)
    return out


def session_info(session_id: str):
    from ..memory import get_store
    session = get_store().get(session_id)
    if session is None:
        raise HTTPException(status_code=404, detail="session not found")
    return session.to_dict()


def session_drop(session_id: str):
    from ..memory import get_store
    dropped = get_store().drop(session_id)
    return {"session_id": session_id, "dropped": dropped}


def sessions_list(details: int = 0, min_turns: int = 0):
    from ..memory import get_store
    store = get_store()
    if details:
        rows = store.summaries()
        if min_turns > 0:
            rows = [row for row in rows if int(row.get("n_turns", 0)) >= min_turns]
        return {"count": len(rows), "sessions": rows}
    ids = store.list_ids()
    if min_turns > 0:
        ids = [
            sid for sid in ids
            if ((store.get(sid) and len(store.get(sid).turns) >= min_turns))
        ]
    return {"count": len(ids), "sessions": ids}


def sessions_gc(request: Request):
    from ..memory import get_store
    summary = get_store().gc_now()
    try:
        get_obs().audit("sessions.gc", request_id=_rid_from(request), **summary)
    except Exception:
        pass
    return summary


def sessions_drop_all(request: Request):
    from ..memory import get_store
    dropped = get_store().drop_all()
    body = {"dropped": dropped}
    try:
        get_obs().audit("sessions.drop_all", request_id=_rid_from(request), **body)
    except Exception:
        pass
    return body


def chat_stream(req: ChatRequest, request: Request):
    """Server-Sent Events version of /v1/chat — same conversational context
    as /v1/chat (follow-up expansion, session history) but streaming.

    Event order:
        event: followup   data: {"is_followup": bool, "expanded_query": "..."}
        event: retrieved  data: {"doc_ids": [...], "count": N}
        event: answer     data: {"chunk": "word "}                      (repeated)
        event: done       data: {session_id, n_turns, answer, sources,
                                 omega, passed, verification}
        event: error      data: {"code","message"}                       (on failure)
    """
    try:
        validate_query_text(req.query)
    except OverflowError as e:
        raise HTTPException(status_code=413, detail=str(e))
    except ValueError as e:
        raise HTTPException(status_code=422, detail=str(e))

    from fastapi.responses import StreamingResponse
    import json as _json

    def _sse(event: str, data: Any) -> str:
        return f"event: {event}\ndata: {_json.dumps(data, ensure_ascii=False)}\n\n"

    def _event_gen():
        try:
            # Detect follow-up + expand (same logic as run_conversation)
            from ..memory import expand_followup, get_store, is_followup
            store = get_store()
            session = store.get_or_create(req.session_id)
            followup = bool(is_followup(req.query, lang=req.lang)) and bool(session.turns)
            expanded = expand_followup(req.query, session) if followup else req.query
            yield _sse("followup", {
                "is_followup": followup,
                "expanded_query": expanded if followup else None,
            })

            # Run the pipeline (extractive — ~ms)
            resp = _pipeline.run_conversation(req.query, req.session_id,
                                              lang=req.lang)

            # Stage 1: retrieval results
            retrieved = []
            seen = set()
            for c in getattr(resp, "retrieved", []) or []:
                did = getattr(getattr(c, "chunk", None), "doc_id", None)
                if did and did not in seen:
                    retrieved.append(did)
                    seen.add(did)
            yield _sse("retrieved",
                       {"doc_ids": retrieved, "count": len(retrieved)})

            # Stage 2: answer streamed word-by-word
            answer = resp.answer or ""
            for w in answer.split(" "):
                if not w:
                    continue
                yield _sse("answer", {"chunk": w + " "})

            # Stage 3: final envelope — includes session_id + n_turns for client
            try:
                omega = float(resp.signals.omega) if resp.signals else None
            except Exception:
                omega = None
            verif = getattr(resp, "verification", None)
            # Re-read session — run_conversation added a new turn
            session = store.get_or_create(req.session_id)
            yield _sse("done", {
                "session_id": req.session_id,
                "n_turns":    len(session.turns),
                "answer":     answer,
                "sources":    list(resp.sources or []),
                "omega":      omega,
                "passed":     bool(getattr(verif, "passed", False)) if verif else None,
                "verification": (verif.to_dict() if hasattr(verif, "to_dict")
                                 else getattr(verif, "__dict__", None)),
            })
        except Exception as e:
            yield _sse("error", {
                "code":    "pipeline_error",
                "message": f"{type(e).__name__}: {e}"[:240],
            })

    return StreamingResponse(_event_gen(),
                             media_type="text/event-stream",
                             headers={
                                 "Cache-Control": "no-cache",
                                 "X-Accel-Buffering": "no",
                             })


# ──────────────────────────────────────────────────────────────────
# v4.x — Document Upload + Community Moderation
# ──────────────────────────────────────────────────────────────────
from fastapi import UploadFile, File, Form, HTTPException as _HTTPExc

@app.post("/v1/upload/session")
async def upload_session_doc(
    file: UploadFile = File(...),
    session_id: str = Form(...),
    uploader: str = Form("anonymous"),
):
    """Upload a document for the current session ONLY (TTL 24h).
    Use this when a lawyer wants to query against their own contract.
    The doc never enters the public corpus."""
    from ..upload import parser as _up_parser
    from ..upload import store as _up_store
    content = await file.read()
    if len(content) > 8 * 1024 * 1024:
        raise _HTTPExc(413, "File too large (max 8MB)")
    try:
        text = _up_parser.parse_text(content, file.filename or "")
    except Exception as e:
        raise _HTTPExc(400, f"Parse error: {e}")
    if len(text) < 50:
        raise _HTTPExc(400, "Extracted text too short")
    chunks = _up_parser.chunk_text(text, chunk_chars=1200, overlap=200)
    metadata = _up_parser.extract_metadata(text)
    metadata["filename"] = file.filename or ""
    doc_id = _up_store.save_session_upload(
        session_id=session_id,
        filename=file.filename or "",
        text=text,
        metadata=metadata,
        chunks=chunks,
        uploader=uploader,
    )
    return {
        "doc_id": doc_id,
        "session_id": session_id,
        "char_count": len(text),
        "chunk_count": len(chunks),
        "metadata": metadata,
    }


@app.get("/v1/upload/session/{session_id}")
async def list_session_docs(session_id: str):
    """List docs uploaded in this session (within 24h TTL)."""
    from ..upload import store as _up_store
    docs = _up_store.get_session_uploads(session_id)
    return {
        "session_id": session_id,
        "n_docs": len(docs),
        "docs": [{"doc_id": d["doc_id"],
                   "filename": d["filename"],
                   "metadata": d["metadata"],
                   "chunk_count": len(d["chunks"]),
                   "uploaded_ts": d["uploaded_ts"]}
                  for d in docs],
    }


@app.post("/v1/upload/submit")
async def submit_to_corpus(
    file: UploadFile = File(...),
    uploader: str = Form(...),
    uploader_email: str = Form(""),
    kind: str = Form("caselaw"),
):
    """Submit a doc to the community moderation queue. Pending until
    a moderator approves; then it's added to the public corpus."""
    from ..upload import parser as _up_parser
    from ..upload import store as _up_store
    content = await file.read()
    if len(content) > 8 * 1024 * 1024:
        raise _HTTPExc(413, "File too large (max 8MB)")
    if not uploader.strip():
        raise _HTTPExc(400, "uploader (your name) is required")
    try:
        text = _up_parser.parse_text(content, file.filename or "")
    except Exception as e:
        raise _HTTPExc(400, f"Parse error: {e}")
    if len(text) < 200:
        raise _HTTPExc(400, "Document too short to submit (min 200 chars)")
    chunks = _up_parser.chunk_text(text, chunk_chars=1200, overlap=200)
    metadata = _up_parser.extract_metadata(text)
    metadata["filename"] = file.filename or ""
    metadata["uploader"] = uploader
    sub_id = _up_store.submit_to_queue(
        uploader=uploader,
        uploader_email=uploader_email,
        filename=file.filename or "",
        text=text,
        metadata=metadata,
        chunks=chunks,
        kind=kind,
    )
    return {
        "sub_id": sub_id,
        "state": "pending",
        "char_count": len(text),
        "chunk_count": len(chunks),
        "metadata": metadata,
    }


@app.get("/v1/moderation/queue")
async def list_moderation_queue(state: str = "pending", limit: int = 50):
    """List submissions awaiting (or filtered by state) moderation."""
    from ..upload import store as _up_store
    return {
        "state": state,
        "items": _up_store.list_queue(state=state, limit=limit),
    }


@app.get("/v1/moderation/submission/{sub_id}")
async def get_submission_detail(sub_id: str):
    from ..upload import store as _up_store
    sub = _up_store.get_submission(sub_id)
    if not sub:
        raise _HTTPExc(404, "submission not found")
    # Truncate text for preview; full text only via separate endpoint
    if len(sub.get("text", "")) > 4000:
        sub["text_preview"] = sub["text"][:4000] + "…"
        sub["text_full_length"] = len(sub["text"])
        del sub["text"]
    return sub


class _ModerationDecideRequest(BaseModel):
    sub_id: str
    decision: str           # 'approve' or 'reject'
    actor: str
    note: Optional[str] = ""


@app.post("/v1/moderation/decide")
async def decide_submission(req: _ModerationDecideRequest):
    """Approve or reject a queued submission. Audited.

    On approve: also promotes the doc into the live corpus (so all
    users can find it immediately) and persists it to
    runtime/community_corpus.jsonl for restart-survivability.
    """
    from ..upload import store as _up_store
    if req.decision not in ("approve", "reject"):
        raise _HTTPExc(400, "decision must be 'approve' or 'reject'")
    if not (req.actor or "").strip():
        raise _HTTPExc(400, "actor (your name) is required")
    ok = _up_store.decide(req.sub_id, req.decision, req.actor,
                          (req.note or ""))
    if not ok:
        raise _HTTPExc(409, "submission not pending or not found")

    out: Dict[str, Any] = {"sub_id": req.sub_id,
                            "decision": req.decision,
                            "actor": req.actor}

    # On approve: promote into the live corpus + community JSONL.
    # We catch errors here so a flaky promote doesn't undo the
    # already-committed moderation decision (which is the source of
    # truth). Surface the warning instead.
    #
    # IMPORTANT: use pipeline.get_pipeline() — that's the instance
    # /v1/query and /v1/data/load_jsonl read from. fastapi_app's local
    # _pipeline is a SEPARATE instance and writes to it are invisible
    # to search.
    if req.decision == "approve":
        try:
            from ..upload import promote as _up_promote
            from ..pipeline import get_pipeline as _get_pipe
            promo = _up_promote.promote_to_corpus(_get_pipe(),
                                                   req.sub_id)
            out["promotion"] = promo
        except Exception as e:
            out["promotion_warning"] = f"{type(e).__name__}: {e}"

    return out


@app.get("/v1/moderation/log")
async def get_mod_log(sub_id: Optional[str] = None, limit: int = 50):
    from ..upload import store as _up_store
    return {"log": _up_store.get_moderation_log(sub_id=sub_id, limit=limit)}


@app.post("/v1/moderation/bulk_promote")
async def moderation_bulk_promote():
    """Promote every approved-but-not-yet-promoted submission into the
    live corpus. Useful for backfill / disaster recovery."""
    from ..upload import promote as _up_promote
    from ..pipeline import get_pipeline as _get_pipe
    return _up_promote.bulk_promote_unpromoted(_get_pipe())


@app.post("/v1/moderation/replay_corpus")
async def moderation_replay_corpus():
    """Re-load runtime/community_corpus.jsonl into the running pipeline
    without re-stamping promoted_ts. Useful to recover community docs
    after a fresh process start without env-var-driven autoload."""
    from ..upload import promote as _up_promote
    from ..pipeline import get_pipeline as _get_pipe
    return _up_promote.replay_community_corpus(_get_pipe())


# ──────────────────────────────────────────────────────────────────────
# v5 — WhatsApp live ingest endpoint.
# Receives thread payloads from the Node sidecar
# (tau_rag/scrapers/whatsapp_sidecar/index.js). The sidecar handles:
# session, message buffering, thread segmentation, and anonymization.
# Here we just authenticate, validate Hebrew quality, write to the
# scraped corpus JSONL, and inject into the live pipeline so the new
# thread is immediately searchable without a server restart.
# ──────────────────────────────────────────────────────────────────────
class _WhatsappLivePayload(BaseModel):  # type: ignore
    id: str
    text: str
    metadata: Dict[str, Any] = {}


def _whatsapp_live_tokens() -> List[str]:
    """Active HMAC keys. Returns a list so we can support TWO valid
    secrets simultaneously during rotation — TAU_RAG_WA_TOKEN (primary)
    + TAU_RAG_WA_TOKEN_OLD (the previous one, kept valid for ~24h while
    sidecars roll over). Either matches → request is accepted.

    Empty list = endpoint disabled (safe default for unconfigured boxes)."""
    out = []
    for env_name in ("TAU_RAG_WA_TOKEN", "TAU_RAG_WA_TOKEN_OLD"):
        v = _os.environ.get(env_name, "").strip()
        if v:
            out.append(v)
    return out


def _whatsapp_live_token() -> str:
    """Backwards compat for callers that just want "is auth configured?"."""
    toks = _whatsapp_live_tokens()
    return toks[0] if toks else ""


# In-memory replay cache + idempotency cache.
# Key = (timestamp, nonce) → seen | record_id → first_seen_ts.
# Both are bounded LRU-ish (capped at 10K entries, drop oldest 10% on
# overflow). Cheap, no extra deps, survives a single process lifetime —
# which is the threat window we care about for replay.
_WA_REPLAY: "OrderedDict[str, int]" = None  # type: ignore  # init below
_WA_IDEMPOTENT: "OrderedDict[str, int]" = None  # type: ignore  # init below
_WA_MAX_CACHE = 10_000
_WA_CLOCK_SKEW_S = 300  # accept timestamps within ±5min of now


def _wa_caches():
    """Lazy-initialize the two caches at first use."""
    global _WA_REPLAY, _WA_IDEMPOTENT
    if _WA_REPLAY is None:
        from collections import OrderedDict
        _WA_REPLAY = OrderedDict()
        _WA_IDEMPOTENT = OrderedDict()
    return _WA_REPLAY, _WA_IDEMPOTENT


def _wa_cache_evict(d):
    """Drop oldest 10% when cache exceeds cap."""
    if len(d) > _WA_MAX_CACHE:
        n_drop = max(1, _WA_MAX_CACHE // 10)
        for _ in range(n_drop):
            try:
                d.popitem(last=False)
            except Exception:
                break


def _wa_verify_hmac(
    body_bytes: bytes,
    ts_header: str,
    nonce_header: str,
    sig_header: str,
) -> Tuple[bool, str]:
    """Verify the HMAC + timestamp + nonce. Returns (ok, reason).

    The signature is HMAC-SHA256 over `<ts>.<nonce>.<body>` keyed by
    the shared token. The sidecar computes the same hash — anyone who
    intercepts a request can't replay it (the (ts,nonce) pair is
    rejected on second use) and can't forge a new one (no key).
    """
    import hmac as _hm
    import hashlib as _hl

    secrets = _whatsapp_live_tokens()
    if not secrets:
        return False, "ingest_disabled — set TAU_RAG_WA_TOKEN"
    if not (ts_header and nonce_header and sig_header):
        return False, "missing_auth_headers"

    # 1. Timestamp window check
    try:
        ts = int(ts_header)
    except Exception:
        return False, "bad_timestamp"
    now = int(time.time())
    if abs(now - ts) > _WA_CLOCK_SKEW_S:
        return False, f"timestamp_skew_{now - ts}s"

    # 2. Replay check — (ts, nonce) must be unique
    replay_cache, _ = _wa_caches()
    key = f"{ts}.{nonce_header}"
    if key in replay_cache:
        return False, "replay_detected"

    # 3. HMAC check — accept either active token. Compare both in
    # constant time before deciding to avoid leaking which one matched.
    msg = f"{ts}.{nonce_header}.".encode("utf-8") + body_bytes
    matched = False
    for secret in secrets:
        expected_sig = _hm.new(
            secret.encode("utf-8"), msg, _hl.sha256
        ).hexdigest()
        if _hm.compare_digest(expected_sig, sig_header):
            matched = True
            # Don't break — still iterate the rest in constant-ish time
    if not matched:
        return False, "hmac_mismatch"

    # All checks passed — record nonce so it can't be replayed
    replay_cache[key] = now
    _wa_cache_evict(replay_cache)
    return True, "ok"


# Persistent replay/idempotency cache — survives uvicorn restarts so a
# 5-minute restart window can't be exploited to replay an old (ts,nonce).
# Format: JSONL where each line is {"k": "<key>", "ts": <int>}. We only
# keep the last _WA_MAX_CACHE entries; older lines are pruned on rotate.
# (Local import — `Path` isn't imported at module level in this file;
# every other path-using function imports it locally too.)
from pathlib import Path as _PathWA
_WA_REPLAY_DISK = (_PathWA(__file__).resolve().parent.parent
                    / "runtime" / "scraped" / "wa_replay.jsonl")


# wa_replay.jsonl rotation — append-only files grow forever. Once the
# file passes _WA_REPLAY_MAX_BYTES, we rewrite it keeping ONLY entries
# from the last 24h (which is also the load-cutoff window). Cheap —
# happens every ~10K writes for most deployments.
_WA_REPLAY_MAX_BYTES = 5 * 1024 * 1024   # 5 MB cap
_WA_REPLAY_WRITE_COUNTER = {"n": 0}


def _wa_rotate_replay_if_needed() -> None:
    """Compact wa_replay.jsonl by dropping entries older than 24h."""
    try:
        if not _WA_REPLAY_DISK.exists():
            return
        if _WA_REPLAY_DISK.stat().st_size < _WA_REPLAY_MAX_BYTES:
            return
        cutoff = int(time.time()) - 24 * 3600
        import json as _ej
        kept_lines = []
        with _WA_REPLAY_DISK.open("r", encoding="utf-8") as f:
            for line in f:
                try:
                    rec = _ej.loads(line)
                    if rec.get("ts", 0) >= cutoff:
                        kept_lines.append(line)
                except Exception:
                    continue
        tmp = _WA_REPLAY_DISK.with_suffix(".tmp")
        with tmp.open("w", encoding="utf-8") as f:
            f.writelines(kept_lines)
        tmp.replace(_WA_REPLAY_DISK)
        print(f"[tau-rag] wa_replay.jsonl rotated → {len(kept_lines)} "
              f"entries kept from last 24h")
    except Exception as e:
        print(f"[tau-rag] wa_replay rotation failed: {e}")


def _wa_disk_persist(kind: str, key: str, ts: int) -> None:
    try:
        _WA_REPLAY_DISK.parent.mkdir(parents=True, exist_ok=True)
        with _WA_REPLAY_DISK.open("a", encoding="utf-8") as f:
            f.write(f'{{"kind":"{kind}","k":"{key}","ts":{ts}}}\n')
        _WA_REPLAY_WRITE_COUNTER["n"] += 1
        # Check size every 1000 writes — file ops aren't free
        if _WA_REPLAY_WRITE_COUNTER["n"] % 1000 == 0:
            _wa_rotate_replay_if_needed()
    except Exception:
        pass  # disk-write is best-effort — in-memory cache still works


def _wa_disk_load() -> None:
    """One-shot load on first ingest — repopulates in-memory caches from
    the JSONL so a restart doesn't open a replay window."""
    if not _WA_REPLAY_DISK.exists():
        return
    replay, idem = _wa_caches()
    cutoff = int(time.time()) - 24 * 3600  # only entries from last 24h
    try:
        import json as _ej
        with _WA_REPLAY_DISK.open("r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try:
                    rec = _ej.loads(line)
                    if rec.get("ts", 0) < cutoff:
                        continue
                    if rec["kind"] == "replay":
                        replay[rec["k"]] = rec["ts"]
                    elif rec["kind"] == "idem":
                        idem[rec["k"]] = rec["ts"]
                except Exception:
                    continue
    except Exception:
        pass


# Stats — visible via /v1/scrapers/whatsapp/stats. Pure counters,
# in-memory; cheap reset on process restart.
# (Local time import — `time` isn't imported at module level here.)
import time as _wa_time
_WA_STATS = {
    "started_ts": int(_wa_time.time()),
    "received": 0, "kept": 0, "duplicates": 0,
    "rejected_auth": 0, "rejected_quality": 0, "rejected_pii": 0,
    "redacted_pii_count": 0,
    "live_inject_failed": 0,
}


@app.get("/v1/scrapers/whatsapp/stats")
def whatsapp_live_stats():
    """Counter snapshot for monitoring — what's the sidecar producing?"""
    out = dict(_WA_STATS)
    out["uptime_s"] = int(time.time()) - out["started_ts"]
    return out


# Token-bucket rate limiter — per (source_ip, sidecar token-prefix).
# A sidecar that's malfunctioning (or a malicious caller with a leaked
# token) can't flood the pipeline. Default: 60 requests/minute, burst 20.
# Way above any reasonable WhatsApp group activity (real groups produce
# 0-5 threads/minute), but well below DoS territory.
_WA_RATE_BUCKETS: Dict[str, Dict[str, float]] = {}
_WA_RATE_PER_MIN = 60
_WA_RATE_BURST = 20


def _wa_rate_limit_key(request) -> str:  # type: ignore
    """Rate-limit by (IP, first 8 chars of signature). Distinct sidecars
    get distinct buckets even from the same IP (e.g. localhost)."""
    ip = (request.headers.get("x-forwarded-for") or
          (request.client.host if request.client else "unknown"))
    sig_prefix = (request.headers.get("x-tau-signature", "") or "")[:8]
    return f"{ip}:{sig_prefix}"


def _wa_check_rate_limit(request) -> Tuple[bool, str]:  # type: ignore
    """Token-bucket: refill at rate/min, capacity = burst."""
    key = _wa_rate_limit_key(request)
    now = time.time()
    bucket = _WA_RATE_BUCKETS.get(key)
    if bucket is None:
        bucket = {"tokens": float(_WA_RATE_BURST), "ts": now}
        _WA_RATE_BUCKETS[key] = bucket
    # Refill since last check
    elapsed = now - bucket["ts"]
    bucket["tokens"] = min(
        float(_WA_RATE_BURST),
        bucket["tokens"] + elapsed * (_WA_RATE_PER_MIN / 60.0),
    )
    bucket["ts"] = now
    if bucket["tokens"] < 1.0:
        return False, f"rate_limit ({_WA_RATE_PER_MIN}/min, burst {_WA_RATE_BURST})"
    bucket["tokens"] -= 1.0
    # Cheap occasional cleanup — drop buckets idle for >10 min
    if len(_WA_RATE_BUCKETS) > 100:
        cutoff = now - 600
        for k in list(_WA_RATE_BUCKETS.keys()):
            if _WA_RATE_BUCKETS[k]["ts"] < cutoff:
                del _WA_RATE_BUCKETS[k]
    return True, "ok"


@app.post("/v1/scrapers/whatsapp/ingest")
async def whatsapp_live_ingest(
    payload: _WhatsappLivePayload,
    request: Request,  # type: ignore  # imported earlier in fastapi_app
):
    """Receive a single thread from the WhatsApp sidecar.

    Auth: HMAC-SHA256 over '<ts>.<nonce>.<body_json>' keyed by
    TAU_RAG_WA_TOKEN. Headers required:
        X-TAU-Timestamp:  unix seconds (must be within ±5min)
        X-TAU-Nonce:      random per-request (any string; replay-checked)
        X-TAU-Signature:  hex(HMAC)

    Pipeline applied to the payload text:
        HMAC verify  →  idempotency  →  Hebrew-quality  →  PII redaction
        →  JSONL append  →  live inject (in threadpool, non-blocking)
    """
    _WA_STATS["received"] += 1
    # First-ever call: rehydrate caches from disk so a restart doesn't
    # open a replay window
    if _WA_STATS["received"] == 1:
        _wa_disk_load()

    # Rate limit BEFORE HMAC verify — protect against compute-intensive
    # crypto attacks (HMAC + body-hash on every request would otherwise
    # be the DoS surface).
    rl_ok, rl_reason = _wa_check_rate_limit(request)
    if not rl_ok:
        _WA_STATS["rejected_auth"] += 1   # bucketed with auth rejects
        return {"ok": False, "error": "rate_limited", "reason": rl_reason}

    # Read raw body so we HMAC the exact bytes the sidecar signed
    raw = await request.body()
    ok, reason = _wa_verify_hmac(
        raw,
        request.headers.get("x-tau-timestamp", ""),
        request.headers.get("x-tau-nonce", ""),
        request.headers.get("x-tau-signature", ""),
    )
    if not ok:
        _WA_STATS["rejected_auth"] += 1
        return {"ok": False, "error": "unauthorized", "reason": reason}
    # Persist the accepted nonce so a restart can't replay it
    try:
        ts_h = int(request.headers.get("x-tau-timestamp", "0"))
        nonce_h = request.headers.get("x-tau-nonce", "")
        if ts_h and nonce_h:
            _wa_disk_persist("replay", f"{ts_h}.{nonce_h}", ts_h)
    except Exception:
        pass

    # Idempotency — the same payload.id within the same process lifetime
    # is a no-op. The sidecar retries on transient errors and we don't
    # want double-injection.
    _, idem_cache = _wa_caches()
    if payload.id in idem_cache:
        _WA_STATS["duplicates"] += 1
        return {"ok": True, "duplicate": True, "id": payload.id,
                "first_seen_ts": idem_cache[payload.id]}
    now_ts = int(time.time())
    idem_cache[payload.id] = now_ts
    _wa_cache_evict(idem_cache)
    _wa_disk_persist("idem", payload.id, now_ts)

    # Validate Hebrew quality — same gate the file-based scraper uses
    try:
        from ..scrapers.core import hebrew_quality_score
        q = hebrew_quality_score(payload.text or "")
        if not q.get("ok"):
            _WA_STATS["rejected_quality"] += 1
            return {"ok": False, "error": "quality_gate_failed",
                    "diagnose": q}
    except Exception:
        pass  # fail-open: scraper module not loadable shouldn't break ingest

    # PII redaction — strip Israeli ID numbers, phones, emails, IBANs,
    # credit cards, bank accounts, vehicle plates from the text BEFORE
    # we persist anything. Counts are added to metadata + global stats
    # so we can monitor what the redactor is catching.
    clean_text = payload.text or ""
    pii_counts: Dict[str, int] = {}
    try:
        from ..scrapers.pii_redactor import redact_pii
        clean_text, pii_counts = redact_pii(clean_text)
        if pii_counts:
            total = sum(pii_counts.values())
            _WA_STATS["redacted_pii_count"] += total
    except Exception:
        pass  # fail-open: redactor unavailable shouldn't drop the message

    # Re-validate quality AFTER redaction — if the redactor stripped so
    # much the remaining text is mostly tokens, drop it.
    try:
        from ..scrapers.core import hebrew_quality_score as _hq2
        q2 = _hq2(clean_text)
        if not q2.get("ok"):
            _WA_STATS["rejected_pii"] += 1
            return {"ok": False, "error": "post_redaction_quality_failed",
                    "diagnose": q2, "pii": pii_counts}
    except Exception:
        pass

    # Append to the same JSONL the file-based scraper writes — autoload
    # on next restart picks it up; live inject below makes it usable now.
    import json as _wj
    from pathlib import Path as _PWA
    from ..core.types import Document as _DocW
    from ..pipeline import get_pipeline as _pipe_w

    here = _PWA(__file__).resolve().parent.parent
    out_path = here / "runtime" / "scraped" / "scraped_corpus.jsonl"
    out_path.parent.mkdir(parents=True, exist_ok=True)
    rec = {
        "id": payload.id,
        "text": clean_text,
        "metadata": dict(payload.metadata or {}),
    }
    rec["metadata"]["source"] = rec["metadata"].get("source", "whatsapp_live")
    rec["metadata"]["ingested_at"] = int(time.time())  # type: ignore
    if pii_counts:
        rec["metadata"]["pii_redacted"] = pii_counts

    try:
        with out_path.open("a", encoding="utf-8") as f:
            f.write(_wj.dumps(rec, ensure_ascii=False) + "\n")
    except Exception as e:
        return {"ok": False, "error": f"write_failed: {type(e).__name__}: {e}"}

    # Live inject — so the thread is searchable immediately. Runs in a
    # threadpool so chunking + indexing don't block the event loop.
    # If the inject fails, the JSONL persistence still ensures the
    # record is loaded on the next server restart.
    from starlette.concurrency import run_in_threadpool

    def _do_inject():
        pipe = _pipe_w()
        # Domain classify, mirroring the autoload behavior
        try:
            from ..domain_classifier import classify as _cls_wa
            res = _cls_wa(clean_text)
            if res.get("top"):
                rec["metadata"]["domain"] = res["top"]
                rec["metadata"]["domain_scores"] = res.get("scores", {})
        except Exception:
            pass
        doc = _DocW(id=payload.id, text=clean_text,
                     metadata=rec["metadata"])
        chunker = _os.environ.get("TAU_RAG_AUTOLOAD_CHUNKER", "legal_hebrew")
        return pipe.add_documents([doc], chunker=chunker)

    try:
        n_chunks = await run_in_threadpool(_do_inject)
        _WA_STATS["kept"] += 1
        return {"ok": True, "persisted": True, "n_chunks": n_chunks,
                "id": payload.id, "pii_redacted": pii_counts}
    except Exception as e:
        _WA_STATS["live_inject_failed"] += 1
        return {"ok": True, "persisted": True, "live_inject_failed":
                f"{type(e).__name__}: {e}", "pii_redacted": pii_counts}


# ──────────────────────────────────────────────────────────────────────
# v5 — Corpus inventory + existence lookup
# Used by the sidebar widget to show "X פסקי דין • Y חוקים" and a
# search-as-you-type "do we have …?" lookup.
# ──────────────────────────────────────────────────────────────────────
def _corpus_inventory_snapshot() -> Dict[str, Any]:
    """Walk indexed docs once, bucket by type. Cheap O(n_docs) — for
    134K docs this is ~30ms. Cached for 60s in process memory."""
    import time as _ti
    cache = getattr(_corpus_inventory_snapshot, "_cache", None)
    if cache and _ti.time() - cache["ts"] < 60:
        return cache["data"]
    from ..pipeline import get_pipeline
    pipe = get_pipeline()
    docs = list(getattr(pipe, "_indexed_docs", []) or [])
    # Buckets — by case_type metadata, with fallbacks
    n_judgments = n_laws = n_dover = n_whatsapp = n_other = 0
    by_court: Dict[str, int] = {}
    by_source: Dict[str, int] = {}
    for d in docs:
        md = getattr(d, "metadata", {}) or {}
        ct = (md.get("case_type") or "").strip()
        src = (md.get("source") or "").strip()
        court = (md.get("court") or "").strip()
        if ct == "חוק":
            n_laws += 1
        elif ct == "הודעת דוברות":
            n_dover += 1
        elif ct == "שאלת ייעוץ":
            n_whatsapp += 1
        elif ct in ("פסק דין", "החלטה") or src in ("court_verdicts",) or not src:
            n_judgments += 1
        else:
            n_other += 1
        if court:
            by_court[court] = by_court.get(court, 0) + 1
        if src:
            by_source[src] = by_source.get(src, 0) + 1
    # v2.97.1 (Day 7) — Include Tier B shard counts. Tier A is the
    # 17K curated corpus loaded into pipe._indexed_docs; Tier B is
    # 525K+ docs lazy-loaded by ShardRouter. The UI sidebar shows
    # the grand total so users see the real coverage.
    tier_b_total = 0
    tier_b_shards: Dict[str, int] = {}
    tier_b_errors: list = []   # surface errors so we can diagnose
    try:
        import json as _json   # local — module-level alias not consistent
        from ..retrieve.shard_router import get_shard_router
        from pathlib import Path as _IP
        router = get_shard_router()
        for shard_name in (router.available_shards or []):
            try:
                # Defensive: cast to Path in case _shard_dir holds str
                shard_root = router._shard_dir.get(shard_name)
                if shard_root is None:
                    shard_root = router.shards_dir
                shard_root = _IP(shard_root)
                mp = shard_root / shard_name / "retriever_state" / "manifest.json"
                if not mp.exists():
                    tier_b_errors.append(
                        f"{shard_name}: manifest missing at {mp}")
                    continue
                m = _json.loads(mp.read_text(encoding="utf-8"))
                n = int(m.get("n_docs", 0))
                tier_b_shards[shard_name] = n
                tier_b_total += n
            except Exception as ie:
                tier_b_errors.append(
                    f"{shard_name}: {type(ie).__name__}: {ie}")
    except Exception as e:
        tier_b_errors.append(f"router init: {type(e).__name__}: {e}")
    if tier_b_errors:
        print(f"[corpus_inventory] tier_b errors: {tier_b_errors[:3]}")

    data = {
        # Top-level `total` now reflects Tier A + Tier B combined,
        # which is what the user actually has access to via queries.
        "total":     len(docs) + tier_b_total,
        "tier_a":    len(docs),           # curated corpus (full pipeline)
        "tier_b":    tier_b_total,        # sharded fallback (BM25 + dense)
        "tier_b_breakdown": dict(sorted(tier_b_shards.items(),
                                         key=lambda kv: -kv[1])),
        "tier_b_errors":   tier_b_errors[:5] if tier_b_errors else [],
        "judgments": n_judgments + tier_b_total,  # all shards are judgments
        "laws":      n_laws,
        "dover":     n_dover,
        "whatsapp":  n_whatsapp,
        "other":     n_other,
        "by_court":  dict(sorted(by_court.items(), key=lambda kv: -kv[1])),
        "by_source": dict(sorted(by_source.items(), key=lambda kv: -kv[1])),
        "fetched_at": int(_ti.time()),
    }
    _corpus_inventory_snapshot._cache = {"ts": _ti.time(), "data": data}
    return data


@app.get("/v1/corpus/inventory")
def corpus_inventory():
    """Counter widget: total docs, judgments, laws, plus per-court /
    per-source breakdowns. Cached 60s."""
    try:
        return _corpus_inventory_snapshot()
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


# ──────────────────────────────────────────────────────────────────────
# Inverted-trigram index for /v1/corpus/exists.
# Prior implementation did O(n_docs) SequenceMatcher across 134K docs
# on every keystroke — ~50-100ms each. The widget runs this on each
# keystroke (with debounce) so the UX was sluggish.
#
# New approach: build a trigram index ONCE per indexed-docs fingerprint
# {trigram → set(doc_idx)}. Lookup intersects the query's trigrams to
# get a short candidate list, then SequenceMatcher only those (typically
# <50). Rebuilds lazily when the doc count changes.
# ──────────────────────────────────────────────────────────────────────
_EXISTS_INDEX: Dict[str, Any] = {
    "fingerprint": None,           # cache key (n_docs)
    "trigrams": {},                # str → set[int] (doc indices)
    "haystack": [],                # parallel array of search-tuples
}


def _trigrams(s: str) -> set:
    s = (s or "").lower()
    if len(s) < 3:
        return {s} if s else set()
    return {s[i:i+3] for i in range(len(s) - 2)}


def _build_exists_index(docs):
    """O(n_docs) one-time pass — builds trigram → doc_idx mapping over
    the searchable surface (id + title). Skips empty entries."""
    tri: Dict[str, set] = {}
    haystack: List[Dict[str, Any]] = []
    for i, d in enumerate(docs):
        md = getattr(d, "metadata", {}) or {}
        did = str(getattr(d, "id", ""))
        title = str(md.get("title") or "")
        case_type = str(md.get("case_type") or "")
        url = md.get("url")
        # Searchable surface — concat id + title, lowercased
        surface = f"{did} {title}".lower()
        haystack.append({
            "idx": i, "id": did, "title": title or did,
            "case_type": case_type, "url": url,
            "surface": surface,
        })
        for tg in _trigrams(surface):
            tri.setdefault(tg, set()).add(i)
    return tri, haystack


def _exists_index_get(docs):
    fp = len(docs)
    if _EXISTS_INDEX["fingerprint"] != fp:
        tri, haystack = _build_exists_index(docs)
        _EXISTS_INDEX["fingerprint"] = fp
        _EXISTS_INDEX["trigrams"] = tri
        _EXISTS_INDEX["haystack"] = haystack
    return _EXISTS_INDEX["trigrams"], _EXISTS_INDEX["haystack"]


@app.get("/v1/corpus/exists")
def corpus_exists(q: str = "", kind: str = "any", limit: int = 10):
    """Check whether a specific case ID or law name exists in the corpus.

    kind:
        "law"        — match by metadata.title with case_type='חוק'
        "case"       — match by doc_id substring (case identifier like 1234/22)
        "any"        — try both, return the strongest match

    Performance: trigram-indexed candidate filter → SequenceMatcher only
    on the ~50 best candidates. ~5ms vs ~50-100ms for the previous
    linear scan over 134K docs.

    Returns:
        {"found": bool, "matches": [{"id", "title", "kind", "score"} ...]}
    """
    if not q or not q.strip():
        return {"found": False, "matches": [],
                "error": "missing query"}
    from difflib import SequenceMatcher
    from ..pipeline import get_pipeline
    needle = q.strip()
    needle_lc = needle.lower()
    pipe = get_pipeline()
    docs = list(getattr(pipe, "_indexed_docs", []) or [])
    if not docs:
        return {"found": False, "matches": [], "n_total_searched": 0}

    tri, haystack = _exists_index_get(docs)

    # Trigram intersection → candidate set. Substring-match (exact) is
    # always a candidate via direct loop, so a 9-char doc_id query that
    # happens to share trigrams with hundreds of docs still resolves.
    q_grams = _trigrams(needle_lc)
    candidates: set = set()
    if q_grams:
        # Sum membership: a doc is a candidate if it shares at least
        # one trigram. Cheap and good-recall.
        for tg in q_grams:
            candidates.update(tri.get(tg, set()))
    # Always add direct substring hits — covers ID lookups too short for
    # trigrams ("/22" alone has one trigram and may match too much).
    if len(candidates) < 1000:  # don't double-scan if we already have plenty
        for h in haystack:
            if needle_lc in h["surface"]:
                candidates.add(h["idx"])

    matches: List[Dict[str, Any]] = []
    for idx in candidates:
        h = haystack[idx]
        title = h["title"]
        ct = h["case_type"]
        did = h["id"]
        surface = h["surface"]
        score = 0.0
        match_kind = None
        # Direct substring → very high confidence
        if needle_lc in surface:
            score = 0.95
            match_kind = "case" if needle_lc in did.lower() else "law"
        # Fuzzy match on title for laws (kind=law/any)
        if kind in ("law", "any") and ct == "חוק" and title:
            fuzzy = SequenceMatcher(None, needle_lc, title.lower()).ratio()
            if fuzzy > score:
                score = fuzzy
                match_kind = "law"
        if score >= 0.6 and match_kind:
            if kind != "any" and match_kind != kind:
                continue
            matches.append({
                "id": did, "title": title or did,
                "kind": match_kind, "score": round(score, 3),
                "url": h["url"],
            })
    matches.sort(key=lambda m: -m["score"])
    return {
        "found": bool(matches),
        "n_total_searched": len(docs),
        "n_candidates": len(candidates),
        "matches": matches[:max(1, limit)],
    }


# ──────────────────────────────────────────────────────────────────────
# v5 — Lawyer Q&A endpoint with relevance gate
# Wraps the existing strategy synthesizer with a guard: if the question
# isn't in our covered legal domains OR retrieval confidence is too low,
# we return a clear "this is outside our corpus" response instead of a
# misleading low-quality answer. Saves users from acting on bad info.
# ──────────────────────────────────────────────────────────────────────
class _LawyerQARequest(BaseModel):  # type: ignore  # BaseModel imported earlier in file
    question: str
    side: Optional[str] = "plaintiff"
    top_k: int = 10
    # Three-tier thresholds (was a single `min_relevance` cliff). Below
    # `out_of_scope_below` we refuse outright; between that and
    # `borderline_below` we answer WITH a clear caveat; above that we
    # answer confidently.
    out_of_scope_below: float = 0.20
    borderline_below: float = 0.35
    # Two-phase rendering: when `early=true`, the endpoint returns just
    # the cheap signals (domain classification + retrieval probe + top
    # hits) and SKIPS the synthesizer. The frontend uses this to show
    # an immediate "we found these cases, generating arguments..."
    # screen, then re-queries with early=false for the full result.
    # Cuts perceived latency from 5-10s to ~1s for the first paint.
    early: bool = False


# ──────────────────────────────────────────────────────────────────────
# Permalink store for /v1/lawyer/ask answers.
#
# Why we need it:
#   localStorage keeps history per-browser, but lawyers want to SHARE an
#   answer with a colleague. Without server-side storage there's nothing
#   to share. This adds a small SQLite table with content-hashed IDs,
#   no auth (read-only by ID), and a 90-day retention policy.
#
# IDs are 12-char base32 over a sha256(q + side + response_hash + nonce)
# so they're unguessable but compact (e.g. "k7m4n8q2p3r5"). A leaked
# ID gives the leaker access to that ONE answer — no auth scope creep.
# ──────────────────────────────────────────────────────────────────────
from pathlib import Path as _PathQA
_LAWYER_QA_DB = (_PathQA(__file__).resolve().parent.parent
                  / "runtime" / "lawyer_qa.db")


def _lawyer_qa_db():
    """Lazy-init SQLite + connection. Cheap — ~1ms first call, cached.
    Schema includes a small in-place migration for v0 → v1 (adding
    view_count + last_viewed_at columns) so existing DBs don't break."""
    import sqlite3
    if not getattr(_lawyer_qa_db, "_conn", None):
        _LAWYER_QA_DB.parent.mkdir(parents=True, exist_ok=True)
        c = sqlite3.connect(str(_LAWYER_QA_DB), check_same_thread=False)
        c.execute("""
            CREATE TABLE IF NOT EXISTS lawyer_qa(
              id              TEXT PRIMARY KEY,
              question        TEXT NOT NULL,
              side            TEXT,
              response        TEXT NOT NULL,
              confidence      TEXT,
              relevance       REAL,
              created_at      INTEGER NOT NULL,
              ip_hash         TEXT,
              view_count      INTEGER DEFAULT 0,
              last_viewed_at  INTEGER
            )
        """)
        c.execute("""
            CREATE INDEX IF NOT EXISTS idx_lawyer_qa_created
            ON lawyer_qa(created_at)
        """)
        # Index for popular ranking — created lazily so old DBs upgrade
        c.execute("""
            CREATE INDEX IF NOT EXISTS idx_lawyer_qa_popular
            ON lawyer_qa(view_count DESC, last_viewed_at DESC)
        """)
        # In-place migration for DBs created before view_count existed.
        # ALTER ADD COLUMN is a no-op if the column already exists in
        # newer SQLite, but older versions raise — try-catch handles it.
        for col, ddl in (
            ("view_count", "ALTER TABLE lawyer_qa ADD COLUMN view_count INTEGER DEFAULT 0"),
            ("last_viewed_at", "ALTER TABLE lawyer_qa ADD COLUMN last_viewed_at INTEGER"),
        ):
            try:
                c.execute(ddl)
            except Exception:
                pass  # column exists already
        c.commit()
        _lawyer_qa_db._conn = c
    return _lawyer_qa_db._conn


def _gen_qa_id(q: str, side: str, response: dict) -> str:
    """Content-hashed ID (12 chars base32) — collision-resistant for
    realistic volumes (~10^7 answers before 50% collision probability)."""
    import hashlib, base64, os, json as _ej
    payload = f"{q}|{side}|{_ej.dumps(response, sort_keys=True, ensure_ascii=False)[:200]}|{os.urandom(8).hex()}"
    digest = hashlib.sha256(payload.encode("utf-8")).digest()
    # base32 lower-case, no padding, alphanumeric-ish (good for URLs)
    return base64.b32encode(digest)[:12].decode("ascii").lower()


def _save_lawyer_answer(q: str, side: str, response: dict,
                         request) -> Optional[str]:
    """Persist + return permalink ID. Best-effort — failure to save
    shouldn't block the answer from being returned to the user."""
    import json as _ej
    import hashlib
    if not response or not response.get("answered"):
        return None  # don't save out_of_scope; nothing useful to share
    try:
        conn = _lawyer_qa_db()
        qa_id = _gen_qa_id(q, side, response)
        # IP hash — privacy-preserving "who saved this" for abuse tracking,
        # not user identification. Salted with a process-life secret.
        ip = (request.client.host if request and request.client else "")
        ip_h = hashlib.sha256(f"tau-rag-qa:{ip}".encode("utf-8")).hexdigest()[:16]
        conn.execute(
            "INSERT INTO lawyer_qa(id, question, side, response, "
            "confidence, relevance, created_at, ip_hash) "
            "VALUES (?, ?, ?, ?, ?, ?, ?, ?)",
            (qa_id, q, side, _ej.dumps(response, ensure_ascii=False),
             response.get("confidence"),
             float(response.get("relevance") or 0.0),
             int(time.time()), ip_h),
        )
        conn.commit()
        return qa_id
    except Exception as e:
        print(f"[lawyer-qa] save failed: {e}")
        return None


@app.get("/v1/lawyer/answer/{qa_id}")
def lawyer_answer_lookup(qa_id: str):
    """Retrieve a previously saved Q&A by permalink ID. No auth — the
    ID itself is the secret. 404 if not found or expired.

    Side effect: bumps view_count and last_viewed_at so the popular
    endpoint can rank answers users actually return to.
    """
    if not qa_id or not qa_id.replace("-", "").replace("_", "").isalnum():
        return {"ok": False, "error": "bad_id"}
    import json as _ej
    try:
        conn = _lawyer_qa_db()
        row = conn.execute(
            "SELECT question, side, response, created_at, "
            "view_count, last_viewed_at FROM lawyer_qa "
            "WHERE id = ?",
            (qa_id,),
        ).fetchone()
        if not row:
            return {"ok": False, "error": "not_found"}
        q, side, response_json, created_at, view_count, last_viewed_at = row
        if created_at < int(time.time()) - 90 * 86400:
            return {"ok": False, "error": "expired"}
        # Bump view counters — best-effort, don't block response on
        # write failure.
        now_ts = int(time.time())
        try:
            conn.execute(
                "UPDATE lawyer_qa SET view_count = COALESCE(view_count,0)+1, "
                "last_viewed_at = ? WHERE id = ?",
                (now_ts, qa_id),
            )
            conn.commit()
        except Exception:
            pass
        return {
            "ok": True,
            "id": qa_id,
            "question": q,
            "side": side,
            "created_at": created_at,
            "view_count": (view_count or 0) + 1,
            "last_viewed_at": now_ts,
            "response": _ej.loads(response_json),
        }
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


@app.get("/v1/lawyer/popular")
def lawyer_popular(limit: int = 10, days: int = 30):
    """Most-viewed Q&As within the last `days` window. Used by the
    sidebar to surface "questions colleagues keep coming back to"
    (or just "answers I keep referencing" in a single-user setup).

    Sort key:  view_count DESC, last_viewed_at DESC.
    Filter:    only entries with view_count >= 1 AND created within
               the rolling window. Never includes expired entries.
    """
    limit = max(1, min(50, int(limit or 10)))
    days = max(1, min(90, int(days or 30)))
    cutoff = int(time.time()) - days * 86400
    try:
        conn = _lawyer_qa_db()
        rows = conn.execute(
            "SELECT id, question, side, confidence, relevance, "
            "created_at, view_count, last_viewed_at "
            "FROM lawyer_qa "
            "WHERE created_at >= ? AND view_count >= 1 "
            "ORDER BY view_count DESC, last_viewed_at DESC "
            "LIMIT ?",
            (cutoff, limit),
        ).fetchall()
        items = [
            {
                "id": r[0],
                "question": r[1],
                "side": r[2],
                "confidence": r[3],
                "relevance": r[4],
                "created_at": r[5],
                "view_count": r[6] or 0,
                "last_viewed_at": r[7],
            }
            for r in rows
        ]
        return {"ok": True, "items": items, "window_days": days}
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


def _lawyer_qa_prune() -> int:
    """Cron-like cleanup — runs on first ingest after 24h. Removes
    answers older than 90 days. Returns count removed."""
    try:
        conn = _lawyer_qa_db()
        cutoff = int(time.time()) - 90 * 86400
        cur = conn.execute(
            "DELETE FROM lawyer_qa WHERE created_at < ?", (cutoff,)
        )
        conn.commit()
        return cur.rowcount or 0
    except Exception:
        return 0


# ──────────────────────────────────────────────────────────────────────
# Doctrine-cluster endpoints — Level 6 of the hierarchical legal graph.
#
# Builds doctrine clusters on demand from the citation network.
# `cluster_for_query` is the routing primitive used by the future
# GraphArgumentBuilder — given a question's retrieved hits, find which
# cluster they fall in.
# ──────────────────────────────────────────────────────────────────────
@app.get("/v1/clusters")
def list_doctrine_clusters(limit: int = 20, domain: Optional[str] = None):
    """Top-N doctrine clusters by anchor in-degree."""
    try:
        from ..pipeline import get_pipeline
        from ..clustering import get_or_build_clusters, cluster_summary
        pipe = get_pipeline()
        clusters = get_or_build_clusters(pipe)
        if domain:
            clusters = [c for c in clusters if c.domain == domain]
        return {
            "ok": True,
            "n_total": len(clusters),
            "items": [cluster_summary(c) for c in clusters[:max(1, limit)]],
        }
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


@app.get("/v1/clusters/{cluster_id}")
def get_doctrine_cluster(cluster_id: str, max_members: int = 100):
    """Detail view for a single cluster — anchor, applications, origins.

    Each member is returned twice for backward compat: as a bare ID list
    (`applications`, `origins`) and as a resolved-meta list with
    `{id, title, domain, kind, is_co_cite}` shape (`applications_meta`,
    `origins_meta`). The frontend uses `_meta` when present and falls back
    to the bare list otherwise.
    """
    try:
        from ..pipeline import get_pipeline
        from ..clustering import (get_or_build_clusters, cluster_summary,
                                  resolve_cluster_members)
        pipe = get_pipeline()
        clusters = get_or_build_clusters(pipe)
        match = next((c for c in clusters if c.cluster_id == cluster_id), None)
        if not match:
            return {"ok": False, "error": "not_found"}
        s = cluster_summary(match)
        apps = match.applications[:max_members]
        ors  = match.origins[:max_members]
        s["applications"] = apps
        s["origins"] = ors

        docs = (getattr(pipe, "_indexed_docs", None) or
                getattr(pipe, "_docs", None) or [])
        co_cite_added = (match.extra or {}).get("co_cite_added") or []
        s["applications_meta"] = resolve_cluster_members(
            docs, apps, co_cite_added=co_cite_added)
        s["origins_meta"]      = resolve_cluster_members(
            docs, ors,  co_cite_added=co_cite_added)
        return {"ok": True, "cluster": s}
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


@app.get("/v1/clusters/by-doc/{doc_id:path}")
def get_clusters_for_doc(doc_id: str):
    """Inverse routing — given a document ID, return every doctrine
    cluster it belongs to with the role it plays.

    Useful when a lawyer has a specific case in mind and wants to see
    which doctrines it sits inside ("apropim is both an anchor of
    `parshanut-tachlitit` and an origin of `om-lev`").
    """
    try:
        from ..pipeline import get_pipeline
        from ..clustering import (get_or_build_clusters, cluster_summary,
                                  find_clusters_for_doc)
        pipe = get_pipeline()
        clusters = get_or_build_clusters(pipe)
        rows = find_clusters_for_doc(clusters, doc_id)
        return {
            "ok": True,
            "doc_id": doc_id,
            "n_clusters": len(rows),
            "items": [
                {**cluster_summary(c), "role": role}
                for c, role in rows
            ],
        }
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


# ──────────────────────────────────────────────────────────────────────
# Hierarchical graph — unified API across all 9 levels.
# Wraps tau_rag.hierarchical_graph.HierarchicalLegalGraph for the web.
# ──────────────────────────────────────────────────────────────────────
@app.get("/v1/hgraph/stats")
def hgraph_stats():
    """Per-level counts: docs, clusters, citations, domain breakdown."""
    try:
        from ..pipeline import get_pipeline
        from ..hierarchical_graph import get_or_build_hgraph
        return {"ok": True, **get_or_build_hgraph(get_pipeline()).stats()}
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


@app.post("/v1/hgraph/retrieve")
def hgraph_retrieve(body: Dict[str, Any]):
    """Multi-resolution retrieval. Body:
        {"query": str, "level": str|int, "k": int, "parent_id": str?}

    `level` is one of: paragraph, section, document, cluster, domain.
    """
    try:
        q = (body.get("query") or "").strip()
        if not q:
            return {"ok": False, "error": "empty_query"}
        level = body.get("level", "document")
        k = int(body.get("k", 10))
        parent_id = body.get("parent_id")
        from ..pipeline import get_pipeline
        from ..hierarchical_graph import get_or_build_hgraph
        hg = get_or_build_hgraph(get_pipeline())
        items = hg.retrieve(q, level=level, k=k, parent_id=parent_id)
        return {"ok": True, "level": level, "n": len(items), "items": items}
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


@app.get("/v1/hgraph/path")
def hgraph_path(node_id: str, level: str = "document"):
    """Walk from a node up to the system root. Returns the full path."""
    try:
        from ..pipeline import get_pipeline
        from ..hierarchical_graph import (get_or_build_hgraph, LEVELS)
        hg = get_or_build_hgraph(get_pipeline())
        path = hg.path(node_id, level=level)
        return {
            "ok": True,
            "path": [
                {"level_index": lvl, "level": LEVELS[lvl], "node_id": nid}
                for lvl, nid in path
            ],
        }
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


@app.post("/v1/hgraph/argument")
def hgraph_build_argument(body: Dict[str, Any]):
    """End-to-end argument assembly. Body:
        {"user_facts": str, "side": "plaintiff"|"defendant",
         "retrieval_k": int}
    Returns an ArgumentBundle dict — domain → cluster → anchor →
    quote → applications + origins → coverage. Never invokes any
    generative model; everything is verbatim from the corpus.
    """
    try:
        facts = (body.get("user_facts") or body.get("question") or "").strip()
        if not facts:
            return {"ok": False, "error": "empty_facts"}
        side = body.get("side") or "plaintiff"
        rk = int(body.get("retrieval_k", 20))
        from ..pipeline import get_pipeline
        from ..hierarchical_graph import get_or_build_hgraph
        hg = get_or_build_hgraph(get_pipeline())
        bundle = hg.build_argument(facts, side=side, retrieval_k=rk)
        return {"ok": True, "bundle": bundle.to_dict()}
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


@app.post("/v1/clusters/route")
def route_query_to_cluster(body: Dict[str, Any]):
    """Given a query, return the top-K doctrine clusters it belongs to.

    Body: {"question": str, "top_k": int = 3, "retrieval_k": int = 20}

    Pipeline:
      1. Run retrieval on the question.
      2. Take top-N retrieved doc_ids.
      3. Score each cluster by overlap.
      4. Return top-K clusters with scores + role of each retrieved doc.
    """
    try:
        q = (body.get("question") or "").strip()
        if not q:
            return {"ok": False, "error": "empty_question"}
        top_k = int(body.get("top_k", 3))
        retrieval_k = int(body.get("retrieval_k", 20))

        from ..pipeline import get_pipeline
        from ..core.types import Query as _Q
        from ..clustering import (get_or_build_clusters, cluster_for_query,
                                   cluster_summary, classify_role)

        pipe = get_pipeline()
        retrieved = pipe.retrievers.search(_Q(text=q), k=retrieval_k)
        retrieved_ids = []
        seen: set = set()
        for r in retrieved:
            chunk = getattr(r, "chunk", None) or r
            did = (getattr(chunk, "doc_id", None) or
                   getattr(r, "doc_id", None))
            if did and did not in seen:
                seen.add(did); retrieved_ids.append(did)

        clusters = get_or_build_clusters(pipe)
        ranked = cluster_for_query(clusters, retrieved_ids, top_k=top_k)
        return {
            "ok": True,
            "n_retrieved": len(retrieved_ids),
            "results": [
                {
                    **cluster_summary(c),
                    "score": round(score, 4),
                    "retrieved_in_cluster": [
                        {"doc_id": did, "role": classify_role(c, did)}
                        for did in retrieved_ids
                        if did in set(c.members())
                    ],
                }
                for c, score in ranked
            ],
        }
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


# ──────────────────────────────────────────────────────────────────────
# Aggregated system dashboard — single endpoint pulls signals from
# every sub-system (corpus, sidecar, Q&A store, pipeline) so the
# frontend can render a one-pane operational view without firing 6
# separate requests.
#
# Each subsystem block is fault-isolated: if `/v1/scrapers/whatsapp/stats`
# would error, only that block returns {"error": ...}; the others still
# populate. Lets the dashboard render partial rather than spinning.
# ──────────────────────────────────────────────────────────────────────
@app.get("/v1/system/dashboard")
def system_dashboard():
    """One-shot health snapshot. ~10ms total since each piece is
    already cached or trivial."""
    import time as _time_dash
    out: Dict[str, Any] = {"ts": int(_time_dash.time())}

    # Corpus inventory — reuses the cached snapshot
    try:
        out["corpus"] = _corpus_inventory_snapshot()
    except Exception as e:
        out["corpus"] = {"error": f"{type(e).__name__}: {e}"}

    # WhatsApp sidecar stats (server side — what we received)
    try:
        out["whatsapp"] = {
            "uptime_s": int(_time_dash.time()) - _WA_STATS["started_ts"],
            "received": _WA_STATS["received"],
            "kept": _WA_STATS["kept"],
            "duplicates": _WA_STATS["duplicates"],
            "rejected_auth": _WA_STATS["rejected_auth"],
            "rejected_quality": _WA_STATS["rejected_quality"],
            "rejected_pii": _WA_STATS["rejected_pii"],
            "redacted_pii_count": _WA_STATS["redacted_pii_count"],
            "live_inject_failed": _WA_STATS["live_inject_failed"],
            "ingest_configured": bool(_whatsapp_live_token()),
        }
    except Exception as e:
        out["whatsapp"] = {"error": f"{type(e).__name__}: {e}"}

    # Lawyer Q&A store — counts by confidence + total views
    try:
        conn = _lawyer_qa_db()
        cur = conn.execute("""
            SELECT
              COUNT(*) AS total,
              SUM(CASE WHEN confidence='high' THEN 1 ELSE 0 END) AS n_high,
              SUM(CASE WHEN confidence='borderline' THEN 1 ELSE 0 END) AS n_border,
              SUM(COALESCE(view_count, 0)) AS total_views,
              COUNT(DISTINCT ip_hash) AS distinct_ips,
              MAX(created_at) AS last_saved_at
            FROM lawyer_qa
        """)
        row = cur.fetchone()
        out["lawyer_qa"] = {
            "total":         row[0] or 0,
            "n_high":        row[1] or 0,
            "n_borderline":  row[2] or 0,
            "total_views":   row[3] or 0,
            "distinct_ips":  row[4] or 0,
            "last_saved_at": row[5],
        }
    except Exception as e:
        out["lawyer_qa"] = {"error": f"{type(e).__name__}: {e}"}

    # Pipeline / retriever health — minimal indicators
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
        n_docs = len(getattr(pipe, "_indexed_docs", []) or [])
        retrievers = getattr(pipe.retrievers, "_retrievers", None)
        n_retrievers = (len(retrievers) if isinstance(retrievers, dict)
                        else (1 if pipe.retrievers else 0))
        out["pipeline"] = {
            "indexed_docs": n_docs,
            "n_retrievers": n_retrievers,
            "ready": n_docs > 0,
        }
    except Exception as e:
        out["pipeline"] = {"error": f"{type(e).__name__}: {e}"}

    # Build progress (set during corpus autoload — user can see "still
    # indexing" if they hit /v1/system/dashboard mid-startup)
    try:
        out["build_progress"] = public_build_progress()
    except Exception:
        out["build_progress"] = {"phase": "unknown"}

    # Recent scrapes — count records in scraped_corpus.jsonl by source
    try:
        from pathlib import Path as _PSC
        import json as _scj
        path = (_PSC(__file__).resolve().parent.parent
                / "runtime" / "scraped" / "scraped_corpus.jsonl")
        if path.exists():
            counts: Dict[str, int] = {}
            n_total = 0
            with path.open("r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try:
                        rec = _scj.loads(line)
                    except Exception:
                        continue
                    src = (rec.get("metadata") or {}).get("source") or "unknown"
                    counts[src] = counts.get(src, 0) + 1
                    n_total += 1
            out["scrapes"] = {
                "total": n_total,
                "by_source": counts,
                "file_bytes": path.stat().st_size,
            }
        else:
            out["scrapes"] = {"total": 0, "by_source": {}, "file_bytes": 0}
    except Exception as e:
        out["scrapes"] = {"error": f"{type(e).__name__}: {e}"}

    # Overall health: degraded if ANY subsystem reports an error
    statuses = [v for v in out.values() if isinstance(v, dict)]
    has_error = any("error" in s for s in statuses)
    pipeline_ready = bool((out.get("pipeline") or {}).get("ready"))
    out["status"] = ("degraded" if has_error
                     else "ok" if pipeline_ready
                     else "starting")
    return out


# ──────────────────────────────────────────────────────────────────────
# Multi-Q&A docx export — pack multiple saved answers into one memo.
#
# Use case: lawyer prepared 5 Q&As during research, wants to send a
# combined memo to the client. Frontend selects N qa_ids and POSTs
# them; backend assembles a single .docx with a TOC and per-Q&A
# sections.
# ──────────────────────────────────────────────────────────────────────
class _LawyerQAExportRequest(BaseModel):  # type: ignore
    qa_ids: List[str]
    title: Optional[str] = None   # cover page title; defaults to date


@app.post("/v1/lawyer/export-pack")
def lawyer_export_pack(body: _LawyerQAExportRequest):
    """Return a Hebrew RTL .docx packing multiple Q&As into one memo."""
    from io import BytesIO
    from fastapi.responses import Response
    import datetime as _dt

    if not body.qa_ids:
        return {"ok": False, "error": "no_ids"}
    # Cap — assembling 50 Q&As into a docx is rarely useful and ties up
    # the event loop (synchronous python-docx).
    if len(body.qa_ids) > 50:
        return {"ok": False, "error": "too_many", "max": 50}

    try:
        from docx import Document as DocxDocument
        from docx.shared import Pt, RGBColor
        from docx.enum.text import WD_ALIGN_PARAGRAPH
        from docx.oxml.ns import qn
        from docx.oxml import OxmlElement
    except ImportError:
        return {"ok": False, "error": "python-docx_not_installed"}

    # Fetch all requested Q&As
    import json as _ej
    fetched: List[Dict[str, Any]] = []
    try:
        conn = _lawyer_qa_db()
        for qa_id in body.qa_ids:
            if not qa_id.replace("-", "").replace("_", "").isalnum():
                continue
            row = conn.execute(
                "SELECT id, question, side, response, created_at "
                "FROM lawyer_qa WHERE id = ?",
                (qa_id,),
            ).fetchone()
            if not row:
                continue
            fetched.append({
                "id": row[0], "question": row[1], "side": row[2],
                "response": _ej.loads(row[3]),
                "created_at": row[4],
            })
    except Exception as e:
        return {"ok": False, "error": f"db: {type(e).__name__}: {e}"}

    if not fetched:
        return {"ok": False, "error": "none_found"}

    # ── Build the docx ──────────────────────────────────────────
    doc = DocxDocument()
    style = doc.styles['Normal']
    style.font.name = 'David'
    style.font.size = Pt(11)
    rPr = style.element.get_or_add_rPr()
    rFonts = rPr.find(qn('w:rFonts'))
    if rFonts is None:
        rFonts = OxmlElement('w:rFonts')
        rPr.append(rFonts)
    rFonts.set(qn('w:cs'), 'David')
    rFonts.set(qn('w:hAnsi'), 'David')

    def _rtl(p):
        pPr = p._p.get_or_add_pPr()
        bidi = OxmlElement('w:bidi')
        bidi.set(qn('w:val'), '1')
        pPr.append(bidi)
        p.alignment = WD_ALIGN_PARAGRAPH.RIGHT

    # Cover page
    title_text = body.title or f"חוברת תשובות משפטיות · {_dt.date.today().strftime('%d/%m/%Y')}"
    title = doc.add_heading(title_text, level=0)
    _rtl(title)
    for run in title.runs:
        run.font.color.rgb = RGBColor(0x4F, 0x46, 0xE5)
    sub = doc.add_paragraph()
    _rtl(sub)
    r = sub.add_run(f"מכיל {len(fetched)} תשובות · נוצר על-ידי tau-rag")
    r.italic = True; r.font.size = Pt(10)
    r.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8)

    # Table of contents
    toc_h = doc.add_heading("תוכן עניינים", level=2); _rtl(toc_h)
    for i, qa in enumerate(fetched, 1):
        q = (qa["question"] or "")[:80]
        p = doc.add_paragraph(); _rtl(p)
        p.add_run(f"{i}. {q}").font.size = Pt(11)
    doc.add_page_break()

    # Per-Q&A section
    for i, qa in enumerate(fetched, 1):
        resp = qa["response"] or {}
        # Q heading
        q_h = doc.add_heading(f"{i}. {qa['question'] or 'שאלה ללא כותרת'}",
                               level=1)
        _rtl(q_h)
        # Metadata line
        md_p = doc.add_paragraph(); _rtl(md_p)
        meta_parts = []
        if qa.get("created_at"):
            meta_parts.append(_dt.date.fromtimestamp(qa["created_at"]).strftime("%d/%m/%Y"))
        if qa.get("side"):
            meta_parts.append(f"צד: {qa['side']}")
        if resp.get("confidence"):
            tier_he = {"high": "ביטחון גבוה", "borderline": "ביטחון בינוני",
                       "out_of_scope": "מחוץ לקורפוס"}.get(resp["confidence"], "")
            if tier_he:
                meta_parts.append(tier_he)
        if resp.get("domain"):
            meta_parts.append(f"תחום: {resp['domain']}")
        r = md_p.add_run(" · ".join(meta_parts))
        r.italic = True; r.font.size = Pt(9)
        r.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8)

        # Disclaimer
        if resp.get("disclaimer_he"):
            disc_p = doc.add_paragraph(); _rtl(disc_p)
            disc_p.paragraph_format.left_indent = Pt(12)
            disc_p.paragraph_format.right_indent = Pt(12)
            r = disc_p.add_run(resp["disclaimer_he"])
            r.italic = True; r.font.size = Pt(10)

        # Arguments
        args = resp.get("arguments") or []
        if args:
            ah = doc.add_heading("טיעונים", level=3); _rtl(ah)
            for j, a in enumerate(args[:5], 1):
                p = doc.add_paragraph(); _rtl(p)
                p.paragraph_format.line_spacing = 1.5
                p.add_run(f"({j}) ").bold = True
                p.add_run(a.get("argument", ""))

        # Citations
        cits = resp.get("citations") or []
        if cits:
            ch = doc.add_heading("ציטוטים", level=3); _rtl(ch)
            for c in cits[:10]:
                p = doc.add_paragraph(style="List Bullet"); _rtl(p)
                r1 = p.add_run(c.get("case_id", ""))
                r1.bold = True
                if c.get("title") and c["title"] != c.get("case_id"):
                    p.add_run(f" · {c['title']}")
                if c.get("outcome"):
                    p.add_run(f" [{c['outcome']}]").font.size = Pt(9)

        # Statutes
        stats = resp.get("statutes") or []
        if stats:
            sh = doc.add_heading("חוקים מצוטטים", level=3); _rtl(sh)
            p = doc.add_paragraph(); _rtl(p)
            p.add_run(" · ".join(
                str(s) if isinstance(s, str) else s.get("name", "")
                for s in stats[:10]
            ))

        # Page break between Q&As (skip last)
        if i < len(fetched):
            doc.add_page_break()

    # Serialize
    buf = BytesIO()
    doc.save(buf)
    fname = f"tau-rag-qa-pack-{_dt.date.today().strftime('%Y%m%d')}.docx"
    return Response(
        content=buf.getvalue(),
        media_type=("application/vnd.openxmlformats-officedocument."
                    "wordprocessingml.document"),
        headers={"Content-Disposition": f'attachment; filename="{fname}"'},
    )


@app.delete("/v1/lawyer/answer/{qa_id}")
def lawyer_answer_delete(qa_id: str, request: Request = None):  # type: ignore
    """Hard-delete a saved Q&A. Required for GDPR right-to-erasure.

    Auth model: a single admin token in TAU_RAG_ADMIN_TOKEN. Without it
    the endpoint refuses (returns "not_configured" so a casual visitor
    can't enumerate). When configured, requires X-TAU-Admin-Token
    header to match. This is intentional minimum auth — the GDPR
    delete flow is rare; full RBAC isn't justified here.
    """
    admin_token = _os.environ.get("TAU_RAG_ADMIN_TOKEN", "")
    if not admin_token:
        return {"ok": False, "error": "not_configured"}
    got = (request.headers.get("x-tau-admin-token") or "") if request else ""
    if got != admin_token:
        return {"ok": False, "error": "unauthorized"}
    if not qa_id or not qa_id.replace("-", "").replace("_", "").isalnum():
        return {"ok": False, "error": "bad_id"}
    try:
        conn = _lawyer_qa_db()
        cur = conn.execute("DELETE FROM lawyer_qa WHERE id = ?", (qa_id,))
        conn.commit()
        return {
            "ok": True,
            "deleted": cur.rowcount,
            "id": qa_id,
        }
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


def _format_lawyer_response(
    *,
    confidence: str,           # "high" / "borderline" / "out_of_scope"
    relevance: float,
    domain_top: Optional[str],
    domain_score: float,
    n_hits: int,
    top_hits: Optional[List[Dict[str, Any]]] = None,
    syn_result: Optional[Dict[str, Any]] = None,
) -> Dict[str, Any]:
    """Shape the response into a structure the frontend can render
    directly without re-parsing the synthesizer dump.

    Surface fields:
        answered, confidence, headline_he, disclaimer_he,
        arguments[], citations[{case_id, title, url, outcome}],
        statutes[], outcome_summary, raw (full synthesizer dict).

    Frontend renders headline/disclaimer at top, lists arguments and
    citations explicitly. Old response shape (`result` dict) preserved
    in `raw` for any caller that depended on it.
    """
    base = {
        "answered": confidence != "out_of_scope",
        "confidence": confidence,
        "relevance": round(relevance, 3),
        "domain": domain_top,
        "domain_score": round(domain_score, 3),
        "n_corpus_hits": n_hits,
        # Always include top hits so the frontend can show "this is what
        # we found" even for out_of_scope/borderline tiers — the user
        # can judge relevance themselves.
        "top_hits": top_hits or [],
    }
    if confidence == "out_of_scope":
        base["headline_he"] = "השאלה אינה מכוסה בקורפוס"
        if top_hits:
            base["disclaimer_he"] = (
                "השאלה רחוקה מכל פסיקה בקורפוס שלנו. "
                "התוצאות הקרובות ביותר שמצאנו (להלן) מקבלות ציון דמיון נמוך — "
                "כנראה לא רלוונטיות. כדאי לבדוק במקור אחר."
            )
        else:
            base["disclaimer_he"] = (
                "לא נמצאו תוצאות. ייתכן שהשאלה לא בעברית, או שאין כיסוי לתחום הזה. "
                "כדאי לבדוק במקור אחר (נבו, פסק דין, או מערכת המשפט)."
            )
        return base

    # Extract arguments + citations from the synthesizer result for
    # easy frontend rendering.
    cba = ((syn_result or {}).get("case_based_arguments") or {})
    drafts = cba.get("drafted_arguments_for_user") or []
    arguments = []
    citations: List[Dict[str, Any]] = []
    seen_case_ids: set = set()
    for d in drafts[:5]:  # cap — frontend renders top 5
        arg_text = d.get("argument", "") or ""
        arguments.append({
            "argument": arg_text,
            "polish_method": d.get("polish_method", "verbatim"),
            "n_sources": d.get("n_sources", 0),
            # v2.89.2 — judgment section the supporting paragraph came
            # from (arguments_plaintiff / discussion / holding / etc.).
            # Frontend renders this as a small chip per argument.
            "section_origin": d.get("section_origin"),
        })
        # Pull case citations out of the draft metadata
        for src in (d.get("source_cases") or []):
            cid = src.get("case_id") or src.get("id")
            if not cid or cid in seen_case_ids:
                continue
            seen_case_ids.add(cid)
            citations.append({
                "case_id": cid,
                "title": src.get("title") or cid,
                "url": src.get("url"),
                "outcome": src.get("outcome"),
                "score": src.get("score"),
            })
    statutes = cba.get("statute_references") or []
    outcome_stats = cba.get("outcome_distribution") or {}

    if confidence == "high":
        base["headline_he"] = "תשובה מבוססת פסיקה"
        base["disclaimer_he"] = (
            "התשובה נבנית מתוך פסיקה דומה עובדתית. אין בה ייעוץ משפטי — "
            "החלטה סופית טעונה בחינה נקודתית של עו\"ד מוסמך."
        )
    else:  # borderline
        base["headline_he"] = "תשובה משוערת — דמיון נמוך לקורפוס"
        base["disclaimer_he"] = (
            f"רמת הדמיון של השאלה לפסיקה הקיימת ({round(relevance, 2)}) "
            "נמוכה מהרגיל. הציטוטים שלהלן עשויים להיות פחות רלוונטיים — "
            "כדאי להצליב עם מקור משפטי נוסף לפני שימוש בתוכן."
        )

    base.update({
        "arguments": arguments,
        "citations": citations[:10],
        "statutes": statutes[:10],
        "outcome_summary": outcome_stats,
        "raw": syn_result,   # full synthesizer dump for power users
    })
    return base


# ──────────────────────────────────────────────────────────────────────
# SSE-streamed Q&A endpoint.
#
# Why this exists alongside POST /v1/lawyer/ask:
#   The two-phase rendering (early=true → early=false) we already have
#   gives a fast time-to-first-paint (~1s for signals + top_hits). But
#   once the synthesizer lands, all 5 arguments + 10 citations + 10
#   statutes appear at once — a sudden "wall of text" jump.
#
#   This endpoint yields events progressively:
#     1. event:phase1   — signals + top_hits   (~1s)
#     2. event:argument — each argument        (one-by-one, ~200ms apart)
#     3. event:meta     — citations + statutes + outcomes
#     4. event:done     — permalink_id, completion marker
#
#   This is "pseudo-streaming" — we don't stream the model itself
#   (synthesizer is sync). But progressive yields with small sleeps
#   between arguments give the UX feel of "the answer is being typed
#   in", which is what users expect from modern AI surfaces.
# ──────────────────────────────────────────────────────────────────────
@app.post("/v1/lawyer/ask-stream")
async def lawyer_ask_stream(body: _LawyerQARequest, request: Request = None):  # type: ignore
    import asyncio
    import json as _sj
    from fastapi.responses import StreamingResponse
    from starlette.concurrency import run_in_threadpool

    def _evt(event: str, data: Dict[str, Any]) -> str:
        """Format a single SSE event line."""
        return f"event: {event}\ndata: {_sj.dumps(data, ensure_ascii=False)}\n\n"

    async def generate():
        q = (body.question or "").strip()
        if not q:
            yield _evt("error", {"reason": "empty_question"})
            return

        # ── Phase 1: signals (in threadpool because retrieval is sync)
        def _do_signals():
            domain_top = None
            domain_score = 0.0
            try:
                from ..domain_classifier import classify
                d = classify(q)
                domain_top = d.get("top")
                domain_score = float((d.get("scores") or {}).get(domain_top, 0.0))
            except Exception:
                pass

            from ..pipeline import get_pipeline
            from ..core.types import Query as _Query
            pipe = get_pipeline()
            relevance = 0.0
            n_hits = 0
            top_hits: List[Dict[str, Any]] = []
            try:
                qry = _Query(text=q)
                retrieved = pipe.retrievers.search(qry, k=body.top_k)
                ranked = sorted(
                    retrieved,
                    key=lambda r: float(getattr(r, "score", 0.0)),
                    reverse=True,
                )
                scored = [float(getattr(r, "score", 0.0)) for r in ranked]
                n_hits = len(scored)
                if scored:
                    relevance = sum(scored[:3]) / min(3, len(scored))
                for r in ranked[:3]:
                    chunk = getattr(r, "chunk", None) or r
                    cid = (getattr(chunk, "doc_id", None) or
                           getattr(r, "doc_id", None))
                    md = getattr(chunk, "metadata", {}) or {}
                    top_hits.append({
                        "case_id": str(cid or ""),
                        "title": md.get("title") or str(cid or ""),
                        "score": round(float(getattr(r, "score", 0.0)), 3),
                        "snippet": (getattr(chunk, "text", "") or "")[:200],
                    })
            except Exception:
                pass

            if relevance >= body.borderline_below:
                conf = "high"
            elif relevance >= body.out_of_scope_below or domain_score >= 0.6:
                conf = "borderline"
            else:
                conf = "out_of_scope"

            return {
                "confidence": conf, "relevance": relevance,
                "domain_top": domain_top, "domain_score": domain_score,
                "n_hits": n_hits, "top_hits": top_hits, "pipe": pipe,
            }

        try:
            sig = await run_in_threadpool(_do_signals)
        except Exception as e:
            yield _evt("error", {"reason": "phase1_failed",
                                  "error": f"{type(e).__name__}: {e}"})
            return

        # First event — even out_of_scope gets a phase1 with top_hits
        early = _format_lawyer_response(
            confidence=sig["confidence"], relevance=sig["relevance"],
            domain_top=sig["domain_top"], domain_score=sig["domain_score"],
            n_hits=sig["n_hits"], top_hits=sig["top_hits"],
        )
        early["phase"] = "early"
        yield _evt("phase1", early)

        # Out of scope — done after phase1
        if sig["confidence"] == "out_of_scope":
            yield _evt("done", {"answered": False, "confidence": "out_of_scope"})
            return

        # ── Phase 2: full synthesizer (heavy)
        def _do_synth():
            from ..intelligence import StrategySynthesizer  # type: ignore
            pipe = sig["pipe"]
            named = getattr(pipe.retrievers, "_retrievers", {}) or {}
            cbr_r = named.get("hebrew_encoder") or pipe.retrievers
            syn = StrategySynthesizer(
                retriever=pipe.retrievers, cbr_retriever=cbr_r,
                full_text_loader=lambda did: pipe.get_text(did) or "",
                pipeline=pipe, polish_with_tau_llm=False,
            )
            result = syn.synthesize(
                user_facts=q, side=body.side or "plaintiff", top_k=body.top_k,
            )
            return (result.to_dict() if hasattr(result, "to_dict")
                    else dict(result))

        # Day 42 — heartbeat during synth.
        # Background: _do_synth() is a single blocking call (StrategySynthesizer
        # does CBR retrieval → doctrine classify → argument building → polish).
        # It runs in a threadpool, so the async loop is free — but emits NOTHING
        # for 5-15s. From the user's perspective: phase1 lands, then silence,
        # then everything in a burst. That's why "no streaming feel" even though
        # it's technically SSE.
        # Fix: race _do_synth against a heartbeat loop that emits "synthesizing"
        # events every ~1s with rotating Hebrew status messages, so the UI keeps
        # visibly updating during the silent middle.
        synth_task = asyncio.create_task(run_in_threadpool(_do_synth))
        _HEARTBEAT_MSGS = [
            "מאתר פסיקה רלוונטית מהקורפוס...",
            "מסווג דוקטרינות משפטיות...",
            "בונה טיעון מהפסיקה (verbatim)...",
            "מחבר ציטוטים לחקיקה...",
            "מוודא דיוק הציטוטים...",
            "מסכם תוצאות מהפסיקה...",
            "מארגן את התשובה...",
        ]
        _hb_i = 0
        while not synth_task.done():
            try:
                # Wait for either the task to finish or 1s heartbeat tick
                await asyncio.wait_for(asyncio.shield(synth_task), timeout=1.0)
            except asyncio.TimeoutError:
                msg = _HEARTBEAT_MSGS[_hb_i % len(_HEARTBEAT_MSGS)]
                yield _evt("synthesizing", {
                    "step": _hb_i + 1,
                    "message_he": msg,
                })
                _hb_i += 1
                continue
            except Exception:
                break
        try:
            syn_dict = synth_task.result()
        except Exception as e:
            yield _evt("error", {"reason": "synth_failed",
                                  "error": f"{type(e).__name__}: {e}"})
            return

        # Format the final response so we know what to stream
        final = _format_lawyer_response(
            confidence=sig["confidence"], relevance=sig["relevance"],
            domain_top=sig["domain_top"], domain_score=sig["domain_score"],
            n_hits=sig["n_hits"], top_hits=sig["top_hits"],
            syn_result=syn_dict,
        )

        # Persist for permalink
        try:
            qa_id = _save_lawyer_answer(q, body.side or "plaintiff",
                                         final, request)
            if qa_id:
                final["permalink_id"] = qa_id
                final["permalink_url"] = f"/?qa={qa_id}"
        except Exception:
            pass

        # Day 44 — sentence/clause-aware streaming for "processual" feel.
        # User feedback after Day 43: ~28-char word chunks at 150ms felt
        # too rapid — "everything appears in fractions of a second", not
        # processual. Pivot to LARGER chunks (sentences/clauses, ~80-150
        # chars) at slower pacing (~450ms), with clear visible pauses
        # between arguments (1.2s). Each chunk now feels like a
        # deliberate step, not a stream of letters.
        def _chunk_text(text: str, target: int = 100):
            """Yield chunks of ~target chars, preferring the FIRST
            sentence-end (.!?\\n) found in window [target-40, target+60],
            then the first clause-end (,;:), then any word boundary at
            ~target. Forward search keeps chunks close to target — backward
            search wrongly favored the trailing punctuation of the whole
            text and yielded one mega-chunk."""
            if not text:
                return
            n = len(text)
            i = 0
            while i < n:
                # Tail shorter than target+30 → yield it whole
                if n - i <= target + 30:
                    yield text[i:]
                    return
                lo = i + max(40, target - 40)
                hi = min(n - 1, i + target + 60)
                strong = -1
                for j in range(lo, hi + 1):
                    c = text[j]
                    if c in ".!?\n" and (j + 1 >= n or text[j + 1] in " \n\t"):
                        strong = j + 1
                        break
                if strong > 0:
                    yield text[i:strong]
                    i = strong
                    continue
                weak = -1
                for j in range(lo, hi + 1):
                    c = text[j]
                    if c in ",;:" and (j + 1 >= n or text[j + 1] == " "):
                        weak = j + 1
                        break
                if weak > 0:
                    yield text[i:weak]
                    i = weak
                    continue
                end = min(i + target, n)
                while end < n and text[end] not in " \n\t":
                    end += 1
                if end < n:
                    end += 1
                yield text[i:end]
                i = end

        # Day 45 — queue-based streaming.
        # Backend pushes ALL chunks as fast as possible (no sleeps).
        # Client buffers them in a local queue and renders at a fixed
        # cadence. Decouples backend wall-clock from user-facing pace:
        # backend stays fast, user sees smooth controlled stream
        # regardless of network jitter or server load.
        for i, arg in enumerate(final.get("arguments") or []):
            arg_text = arg.get("argument") or ""
            head_meta = {k: v for k, v in arg.items() if k != "argument"}
            yield _evt("argument_start", {"index": i, **head_meta})
            for chunk in _chunk_text(arg_text, target=100):
                yield _evt("argument_chunk", {
                    "index": i,
                    "text":  chunk,
                })
            yield _evt("argument_end", {"index": i})

        # Citations + statutes + outcomes — single batch event
        yield _evt("meta", {
            "citations": final.get("citations") or [],
            "statutes":  final.get("statutes") or [],
            "outcome_summary": final.get("outcome_summary") or {},
        })

        # Done — frontend can stop listening, save to history
        yield _evt("done", {
            "confidence": final.get("confidence"),
            "permalink_id": final.get("permalink_id"),
            "permalink_url": final.get("permalink_url"),
            "answered": final.get("answered"),
        })

    return StreamingResponse(
        generate(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "X-Accel-Buffering": "no",  # disable nginx buffering for SSE
        },
    )


@app.post("/v1/lawyer/ask")
def lawyer_ask(body: _LawyerQARequest, request: Request = None):  # type: ignore
    """Answer a lawyer's question — with three-tier confidence.

    Pipeline:
      1. Domain classifier → top legal domain + score
      2. Quick retrieval probe → top-k similarity scores → relevance
      3. Determine confidence tier from relevance + domain_score:
           • relevance >= borderline_below             → "high"
           • relevance >= out_of_scope_below           → "borderline"
                (still answers, but with a clear caveat)
           • else                                       → "out_of_scope"
                (refuses, returns hint)
      4. For high/borderline: run synthesizer, structure the response
         with explicit arguments + citations + disclaimer.
      5. For out_of_scope: return the hint, don't run the synthesizer.

    The middle tier ("borderline") is the key change. Lawyers often
    ask questions that are *partially* covered — with the old binary
    gate they got either a confident answer or a flat refusal. Now they
    get an answer with explicit "low confidence" framing.
    """
    q = (body.question or "").strip()
    if not q:
        return {"answered": False, "confidence": "out_of_scope",
                "reason": "empty_question"}

    # 1. Domain classification
    domain_top = None
    domain_score = 0.0
    try:
        from ..domain_classifier import classify
        d = classify(q)
        domain_top = d.get("top")
        domain_score = float((d.get("scores") or {}).get(domain_top, 0.0))
    except Exception:
        pass

    # 2. Retrieval relevance probe — also captures top-3 hits with their
    # case IDs/scores so the response can show the user WHAT was found
    # when relevance is borderline (instead of a bare 0.27 number).
    from ..pipeline import get_pipeline
    from ..core.types import Query as _Query
    pipe = get_pipeline()
    relevance = 0.0
    n_hits = 0
    top_hits: List[Dict[str, Any]] = []
    try:
        query = _Query(text=q)
        retrieved = pipe.retrievers.search(query, k=body.top_k)
        # Sort once, keep both scores and case info
        ranked = sorted(
            retrieved,
            key=lambda r: float(getattr(r, "score", 0.0)),
            reverse=True,
        )
        scored = [float(getattr(r, "score", 0.0)) for r in ranked]
        n_hits = len(scored)
        if scored:
            relevance = sum(scored[:3]) / min(3, len(scored))
        # Capture top-3 for the borderline explainer
        for r in ranked[:3]:
            chunk = getattr(r, "chunk", None) or r
            cid = (getattr(chunk, "doc_id", None) or
                   getattr(r, "doc_id", None) or
                   getattr(chunk, "id", None))
            md = getattr(chunk, "metadata", {}) or {}
            top_hits.append({
                "case_id": str(cid or ""),
                "title": md.get("title") or str(cid or ""),
                "score": round(float(getattr(r, "score", 0.0)), 3),
                "snippet": (getattr(chunk, "text", "") or "")[:200],
            })
    except Exception:
        pass

    # 3. Determine confidence tier. Domain score acts as a tie-breaker:
    # a high-confidence domain match can rescue borderline relevance
    # (the question IS clearly legal, our corpus just doesn't have a
    # close fact pattern — answer with caveat).
    if relevance >= body.borderline_below:
        confidence = "high"
    elif relevance >= body.out_of_scope_below or domain_score >= 0.6:
        confidence = "borderline"
    else:
        confidence = "out_of_scope"

    if confidence == "out_of_scope":
        # Don't persist out-of-scope answers — nothing useful to share.
        return _format_lawyer_response(
            confidence="out_of_scope",
            relevance=relevance,
            domain_top=domain_top, domain_score=domain_score,
            n_hits=n_hits, top_hits=top_hits,
        )

    # Early return — `early=true` means "give me just the signals, skip
    # the synthesizer". The frontend will render a placeholder screen
    # with these signals + top_hits, then re-query with early=false
    # for the full arguments. Cuts time-to-first-paint from 5-10s to
    # ~1-2s (just the retrieval probe).
    if body.early:
        early_resp = _format_lawyer_response(
            confidence=confidence,
            relevance=relevance,
            domain_top=domain_top, domain_score=domain_score,
            n_hits=n_hits, top_hits=top_hits,
            syn_result=None,  # skipped — frontend knows it's early
        )
        early_resp["phase"] = "early"
        return early_resp

    # 4. Run synthesizer
    try:
        from ..intelligence import StrategySynthesizer  # type: ignore
        named = getattr(pipe.retrievers, "_retrievers", {}) or {}
        cbr_r = named.get("hebrew_encoder") or pipe.retrievers
        syn = StrategySynthesizer(
            retriever=pipe.retrievers, cbr_retriever=cbr_r,
            full_text_loader=lambda did: pipe.get_text(did) or "",
            pipeline=pipe,
            polish_with_tau_llm=False,
        )
        result = syn.synthesize(
            user_facts=q, side=body.side or "plaintiff", top_k=body.top_k,
        )
        syn_dict = (result.to_dict() if hasattr(result, "to_dict")
                    else dict(result))
        formatted = _format_lawyer_response(
            confidence=confidence,
            relevance=relevance,
            domain_top=domain_top, domain_score=domain_score,
            n_hits=n_hits, top_hits=top_hits,
            syn_result=syn_dict,
        )

        # Enrich with hierarchical-graph bundle when available. Adds the
        # structured doctrine view (anchor + applications + origins +
        # statute_refs + fact_mapping) alongside the existing flat
        # arguments. Frontend can render either based on which fields
        # are populated. Failure is non-fatal — bundle is purely
        # additive.
        try:
            from ..hierarchical_graph import get_or_build_hgraph
            hg = get_or_build_hgraph(pipe)
            bundle = hg.build_argument(q, side=body.side or "plaintiff")
            bundle_dict = bundle.to_dict()
            # Skip empty bundle (no cluster found) so frontend can fall
            # back to flat arguments rendering
            if bundle_dict.get("cluster_id"):
                formatted["bundle"] = bundle_dict

                # Promote the bundle's anchor_quote to arguments[0] when
                # the bundle is confident. The case_based_arguments
                # pipeline does semantic clustering over top-K hits and
                # can surface the dominant CORPUS theme rather than the
                # cluster the query routes to. The graph bundle is the
                # doctrine-aware source of truth.
                graph_argument = bundle.to_argument_text()
                if graph_argument:
                    # v2.89.3 — surface section_origin on the graph_bundle
                    # entry too (the bundle's anchor_section from the
                    # diagnostic dict).
                    if bundle_dict.get("diagnostic"):
                        graph_argument.setdefault(
                            "section_origin",
                            bundle_dict["diagnostic"].get("anchor_section"))
                    # Mark legacy entries as supplementary so the UI can
                    # de-emphasize them when graph_bundle is leading.
                    legacy = formatted.get("arguments") or []
                    for la in legacy:
                        if la.get("polish_method") != "graph_bundle":
                            la["is_supplementary"] = True
                    formatted["arguments"] = ([graph_argument] + legacy)[:6]
            # Confidence quality gate. The relevance-based confidence
            # set earlier (line ~19792) only checks BM25/dense scores —
            # which return *something* even for OOS queries. The bundle
            # is the doctrine-aware source of truth: if `can_promote()`
            # is False, the cluster routing didn't find a strong-enough
            # anchor, so we shouldn't claim "high" confidence regardless
            # of how relevance scored. Downgrade to "borderline" so the
            # UI shows the disclaimer prominently.
            if not bundle.can_promote() and formatted.get("confidence") == "high":
                formatted["confidence"] = "borderline"
                formatted["confidence_downgraded"] = "weak_bundle"
        except Exception as _bundle_e:
            # Log but don't fail the request
            print(f"[lawyer_ask] hgraph bundle skipped: {_bundle_e}")

        # v2.96 (Day 7) — Tier B fallback. When Tier A's bundle is weak
        # (borderline or out_of_scope confidence) we query the per-domain
        # shard router for additional verbatim citations from the 525K-doc
        # corpus. Surfaced as a separate `tier_b_results` field so the UI
        # can render them under a "ציטוטים נוספים" header — distinct
        # quality bar from the main Tier A answer.
        confidence_now = formatted.get("confidence", "")
        if confidence_now in ("borderline", "out_of_scope"):
            try:
                from ..retrieve.shard_router import get_shard_router
                router = get_shard_router()
                if router.is_available():
                    tier_b = router.search(q, k=5)
                    if tier_b:
                        formatted["tier_b_results"] = tier_b
            except Exception as _tb_e:
                print(f"[lawyer_ask] tier_b fallback skipped: {_tb_e}")

        # v2.96.4 (Day 7) — Curated doctrine catalog match. Same
        # pattern as /v1/argument/analyze: run classify_doctrines on
        # the question, surface as doctrine_match if score>=1.5.
        try:
            from ..intelligence.doctrine_classifier import classify_doctrines
            doc_matches = classify_doctrines(q, k=3, min_score=1.0)
            if doc_matches:
                top = doc_matches[0]
                dm = top.to_dict()
                if len(doc_matches) > 1:
                    dm["alternatives"] = [m.to_dict()
                                          for m in doc_matches[1:3]]
                formatted["doctrine_match"] = dm
        except Exception as _dc_e:
            print(f"[lawyer_ask] doctrine_classifier skipped: {_dc_e}")

        # Persist for permalink — best-effort, never blocks the response.
        # 24h post-startup, also opportunistically prune expired entries.
        try:
            qa_id = _save_lawyer_answer(q, body.side or "plaintiff",
                                         formatted, request)
            if qa_id:
                formatted["permalink_id"] = qa_id
                formatted["permalink_url"] = f"/?qa={qa_id}"
        except Exception:
            pass
        # Cheap periodic cleanup — run once per ~1000 answers
        try:
            if (sum(_WA_STATS.get(k, 0) for k in ("kept",)) % 1000) == 0:
                _lawyer_qa_prune()
        except Exception:
            pass
        return formatted
    except Exception as e:
        return {
            "answered": False,
            "reason": "synthesizer_error",
            "error": f"{type(e).__name__}: {e}",
        }


# ──────────────────────────────────────────────────────────────────────
# v2.93.0 (Day 4) — Argument Classifier
#
# /v1/argument/analyze takes a single legal claim + side and returns
# bilateral analysis: pro_arguments (supporting), con_arguments
# (opposing), strength_score, doctrine bundle, and missing facts.
#
# Implementation: calls the existing synthesizer twice (plaintiff +
# defendant frames) to get both sides, de-dups by case ID, classifies
# each retrieved paragraph by lexical heuristic into a section hint,
# and scores strength as the pro/(pro+con) ratio of retrieval scores.
#
# No external LLM. No corpus reindex. Honest about its heuristic nature
# in `meta.method`.
# ──────────────────────────────────────────────────────────────────────
import re as _argclf_re  # local alias — module-level `re` isn't imported


class _ArgumentAnalyzeRequest(BaseModel):  # type: ignore
    claim: str
    side: Optional[str] = "אובייקטיבי"
    top_k: int = 10


# Heuristic section classifier — runs on each retrieved paragraph text.
# Order matters: ruling/holding markers checked BEFORE party-claim markers
# (some rulings quote the parties before deciding).
_SECTION_RULING_RX = _argclf_re.compile(
    r"(הכרעה|סבורני|לפיכך|אני קובע|אני פוסק|מסקנת|המסקנה|"
    r"בית[\s\-]?המשפט קבע|נדחית|נדחתה|מתקבלת|מתקבל)"
)
_SECTION_PLAINTIFF_RX = _argclf_re.compile(
    r"(טוען\s+ה?תובע|לטענת\s+ה?תובע|טענת\s+ה?תובע|"
    r"טוען\s+ה?מבקש|לטענת\s+ה?מבקש|טענת\s+ה?מבקש|"
    r"לדבריו\s+של\s+ה?תובע|העותר\s+טוען|טענות\s+ה?עותר)"
)
_SECTION_DEFENDANT_RX = _argclf_re.compile(
    r"(טוען\s+ה?נתבע|לטענת\s+ה?נתבע|טענת\s+ה?נתבע|"
    r"טוען\s+ה?משיב|לטענת\s+ה?משיב|טענת\s+ה?משיב|"
    r"לדבריו\s+של\s+ה?נתבע|המשיבה\s+טוענת|טענות\s+ה?משיב)"
)
_SECTION_DISCUSSION_RX = _argclf_re.compile(
    r"(לעניין\s+זה|כידוע|מן\s+הראוי|המבחן\s+הוא|"
    r"מבחן\s+ה|הלכה\s+פסוקה|נפסק\s+כי|הלכת)"
)


def _classify_paragraph_section(text: str) -> str:
    """Lexical classifier — returns one of:
        'plaintiff_claim', 'defendant_claim', 'ruling', 'discussion', 'unlabeled'
    """
    t = text[:500]  # short prefix is enough; full-text scan would over-match
    if _SECTION_RULING_RX.search(t):
        return "ruling"
    if _SECTION_PLAINTIFF_RX.search(t):
        return "plaintiff_claim"
    if _SECTION_DEFENDANT_RX.search(t):
        return "defendant_claim"
    if _SECTION_DISCUSSION_RX.search(t):
        return "discussion"
    return "unlabeled"


def _strength_caption(score: int) -> str:
    """Strength score → Hebrew caption per LANDING_COPY."""
    if score >= 90:  return "טענה חזקה — פסיקה תומכת חזקה ומגוונת"
    if score >= 75:  return "טענה חזקה יחסית — פסיקה תומכת מספקת"
    if score >= 50:  return "טענה בינונית — פסיקה מעורבת"
    if score >= 25:  return "טענה חלשה — פסיקה ברובה סותרת"
    return "טענה חלשה מאוד או חסרה פסיקה רלוונטית"


# v2.99 (Day 9) — Extract case citation from argument text when the
# `source_cases` metadata is empty. The synthesizer often embeds the
# citation in-line like `[CL-בגץ7585_01]` or as a Hebrew citation
# `בג"ץ 7585/01` near the start. Without this, every result showed
# "(ללא ציטוט)" which crushed credibility.
_CITE_BRACKET_RX = _argclf_re.compile(
    r"\[CL-([א-תa-zA-Z0-9_]+)\]"
)
_CITE_HEBREW_RX = _argclf_re.compile(
    r"\b((?:ע\"?א|ע\"?פ|בג\"?ץ|רע\"?א|רע\"?פ|דנ\"?א|דנ\"?פ|בש\"?פ|"
    r"ע\"?ע|בג\"?צ|בש\"?א|תמ\"?ש|ה\"?פ|בה\"?ן)\s*\d+(?:[\/\-]\d+)+)"
)


def _extract_citation_from_text(text: str) -> Optional[str]:
    """Best-effort case-citation extraction. Tries (in order):
      1. Hebrew citation pattern ('ע"א 1234/22', 'בג"ץ 5856/03')
      2. [CL-<id>] bracket marker
    Returns the most-readable form, or None."""
    if not text:
        return None
    # 1. Native Hebrew citation — most user-friendly
    m = _CITE_HEBREW_RX.search(text[:1500])
    if m:
        return m.group(1).strip()
    # 2. CL-bracket — normalize back to Hebrew form
    m = _CITE_BRACKET_RX.search(text[:1500])
    if m:
        raw = m.group(1)
        # 'בגץ5856_03' → 'בג"ץ 5856/03'
        for prefix, hebrew in (
            ("בגץ", 'בג"ץ '),
            ("ע\"א", 'ע"א '),  # already with quote
            ("עא", 'ע"א '),
            ("רעא", 'רע"א '),
            ("רעפ", 'רע"פ '),
            ("דנא", 'דנ"א '),
            ("ע\"ע", 'ע"ע '),
            ("עע", 'ע"ע '),
            ("בשפ", 'בש"פ '),
            ("בשא", 'בש"א '),
            ("הפ", 'ה"פ '),
            ("תמש", 'תמ"ש '),
        ):
            if raw.startswith(prefix):
                rest = raw[len(prefix):].replace("_", "/")
                return f"{hebrew}{rest}"
        # Unknown prefix — just return raw with _ → /
        return raw.replace("_", "/")
    return None


def _shape_argument_for_analyze(arg: dict, section_hint: str) -> dict:
    """Reshape a drafted_argument dict from synthesizer into the
    analyze response shape — keeps verbatim text, adds section_hint,
    trims fields the UI doesn't need.

    Source structure: drafts have `source_cases` (list of case dicts
    with case_id, title, url, outcome, score) or sometimes a flat
    `source_case_id`. When neither is populated, fall back to
    extracting the citation from the argument text itself.
    """
    text = (arg.get("argument") or arg.get("text") or "")
    sources = (arg.get("source_cases")
               or arg.get("sources")
               or arg.get("citations")
               or [])
    primary_src = sources[0] if sources else {}
    case_label = (primary_src.get("title")
                  or primary_src.get("case_id")
                  or primary_src.get("id")
                  or arg.get("source_case_id"))
    if not case_label:
        # v2.99 — last resort: parse the text
        case_label = _extract_citation_from_text(text) or "(ללא ציטוט)"
    return {
        "text": text,
        "source_case": case_label,
        "source_score": round(float(primary_src.get("score")
                                    or arg.get("score") or 0.0), 3),
        "section_hint": section_hint,
        "n_sources": int(arg.get("n_sources") or len(sources) or 0),
    }


@app.post("/v1/argument/analyze")
def argument_analyze(body: _ArgumentAnalyzeRequest, request: Request = None):  # type: ignore
    """Bilateral argument analysis — runs synthesizer for plaintiff +
    defendant frames, classifies each result by section, returns
    pro/con split + strength score.

    v2.94 (Day 5): gated to Solo+. Costs 5 credits per call.
    Identity = X-User-Email header (set by frontend from Google
    Sign-In session). Anonymous callers (no header) get Free tier,
    which doesn't unlock argument_analyze — they see a 403.
    """
    claim = (body.claim or "").strip()
    if not claim:
        return {"ok": False, "reason": "empty_claim"}

    # Tier gate
    user_email = None
    if request is not None:
        user_email = (request.headers.get("X-User-Email") or "").strip() or None
    from ..middleware.entitlements import check_entitlement
    decision = check_entitlement(user_email, "argument_analyze")
    if not decision.allowed:
        # 402 Payment Required for credits, 403 Forbidden for tier-locked.
        status = 402 if decision.reason == "credits_exhausted" else 403
        body_he = {
            "ok": False,
            "reason": decision.reason,
            "entitlement": decision.to_dict(),
            "message_he": (
                "המנוי שלך הגיע למכסת הקרדיטים החודשית. "
                "שדרג ל-Pro לקבלת 1,500 קרדיטים בחודש."
                if decision.reason == "credits_exhausted"
                else "ניתוח טענה הוא פיצ׳ר חבילות בתשלום (Solo ומעלה). "
                     "שדרג ל-Solo (₪199 לחיים — Founding 50)."
            ),
            "upgrade_url": "/pricing",
        }
        return JSONResponse(status_code=status, content=body_he)

    try:
        from ..pipeline import get_pipeline
        from ..intelligence import StrategySynthesizer
        from ..hierarchical_graph import get_or_build_hgraph
    except Exception as e:
        return {"ok": False, "reason": "import_failed",
                "error": f"{type(e).__name__}: {e}"}

    pipe = get_pipeline()
    named = getattr(pipe.retrievers, "_retrievers", {}) or {}
    cbr_r = named.get("hebrew_encoder") or pipe.retrievers
    syn = StrategySynthesizer(
        retriever=pipe.retrievers, cbr_retriever=cbr_r,
        full_text_loader=lambda did: pipe.get_text(did) or "",
        pipeline=pipe, polish_with_tau_llm=False,
    )

    # Run synthesizer twice — one frame per side. The synthesizer returns
    # a nested dict: result["case_based_arguments"]["drafted_arguments_for_user"]
    # is the list of args. Each draft has `argument` (text), `source_cases`
    # (case metadata), `polish_method`, `section_origin`.
    def _frame(side: str) -> list:
        try:
            r = syn.synthesize(user_facts=claim, side=side, top_k=body.top_k)
            d = r.to_dict() if hasattr(r, "to_dict") else dict(r)
            cba = d.get("case_based_arguments") or {}
            return cba.get("drafted_arguments_for_user") or []
        except Exception as _e:
            print(f"[argument/analyze] _frame({side}) failed: {_e}")
            return []

    pro_raw = _frame("plaintiff")
    con_raw = _frame("defendant")

    # De-dup by source case across the two sets — a case shouldn't
    # appear on both sides simultaneously. Keep the higher-score side.
    def _arg_src_id(arg: dict) -> str:
        src_list = arg.get("source_cases") or arg.get("sources") or []
        if src_list:
            s0 = src_list[0]
            return (s0.get("case_id") or s0.get("id")
                    or s0.get("title") or arg.get("argument", "")[:80])
        return arg.get("source_case_id") or arg.get("argument", "")[:80]

    def _arg_score(arg: dict) -> float:
        src_list = arg.get("source_cases") or arg.get("sources") or []
        if src_list and src_list[0].get("score") is not None:
            return float(src_list[0]["score"])
        return float(arg.get("score") or 0.0)

    seen: dict = {}  # case_id → (side, score, arg)
    for arg in pro_raw:
        src = _arg_src_id(arg)
        seen[src] = ("pro", _arg_score(arg), arg)
    for arg in con_raw:
        src = _arg_src_id(arg)
        s = _arg_score(arg)
        if src in seen:
            _prev_side, prev_s, _prev_arg = seen[src]
            if s > prev_s:
                seen[src] = ("con", s, arg)
        else:
            seen[src] = ("con", s, arg)

    # Build the bilateral split with section classification
    pro_args = []
    con_args = []
    for src, (side, score, arg) in seen.items():
        text = arg.get("argument") or arg.get("text") or ""
        section = _classify_paragraph_section(text)
        shaped = _shape_argument_for_analyze(arg, section)
        (pro_args if side == "pro" else con_args).append(shaped)

    # Strength score: pro/(pro+con) of retrieval scores, scaled to 0-100
    pro_sum = sum(a["source_score"] for a in pro_args) or 0.001
    con_sum = sum(a["source_score"] for a in con_args) or 0.001
    raw_strength = pro_sum / (pro_sum + con_sum)
    strength_score = int(round(raw_strength * 100))

    # Doctrine bundle (shared between both sides)
    doctrine = None
    missing_facts: list = []
    try:
        hg = get_or_build_hgraph(pipe)
        bundle = hg.build_argument(claim, side=body.side or "plaintiff")
        bd = bundle.to_dict()
        if bd.get("cluster_id"):
            doctrine = {
                "anchor_label": bd.get("anchor_label"),
                "anchor_quote": bd.get("anchor_quote"),
                "cluster_score": bd.get("cluster_score"),
                "coverage": bd.get("coverage"),
                "promoted": bundle.can_promote(),
            }
        # Missing facts: fact_mapping entries flagged as not-covered
        for fm in bd.get("fact_mapping") or []:
            if isinstance(fm, dict) and fm.get("covered") is False:
                missing_facts.append(fm.get("element") or fm.get("label") or "")
        # If can_promote is False, strength is capped at 50
        if doctrine and not doctrine.get("promoted"):
            strength_score = min(strength_score, 50)
    except Exception as e:
        print(f"[argument/analyze] doctrine bundle skipped: {e}")

    # v2.96 (Day 7) — Tier B fallback. When the doctrine bundle is
    # weak (can_promote=False) OR we got <3 total args from Tier A,
    # query the per-domain shard router for additional verbatim
    # citations from the wider 525K-doc corpus. Surfaced as a
    # separate field so the UI can label these as "ציטוטים מהקורפוס
    # הרחב" (no doctrine framing) rather than mixing with the
    # bilateral Tier A results.
    tier_b_results: list = []
    needs_fallback = (
        (doctrine is None or not doctrine.get("promoted"))
        or (len(pro_args) + len(con_args)) < 3
    )
    if needs_fallback:
        try:
            from ..retrieve.shard_router import get_shard_router
            router = get_shard_router()
            if router.is_available():
                tier_b_results = router.search(claim, k=5)
        except Exception as e:
            print(f"[argument/analyze] tier_b fallback skipped: {e}")

    # v2.96.4 (Day 7) — Curated doctrine catalog match. Run the
    # `classify_doctrines` keyword classifier over the claim text. If
    # the top match is confident (score >= 1.5 = at least one keyword
    # + one statute/element OR two keywords), surface as a separate
    # `doctrine_match` field. UI renders this as a high-trust chip
    # above the bundle anchor, since it's a NAMED doctrine from a
    # curated catalog vs. an emergent cluster.
    doctrine_match = None
    try:
        from ..intelligence.doctrine_classifier import classify_doctrines
        matches = classify_doctrines(claim, k=3, min_score=1.0)
        if matches:
            top = matches[0]
            doctrine_match = top.to_dict()
            # Include up to 2 secondary matches in case the lawyer wants
            # to explore alternatives ("is this really good faith, or
            # could it be estoppel?")
            if len(matches) > 1:
                doctrine_match["alternatives"] = [
                    m.to_dict() for m in matches[1:3]
                ]
    except Exception as e:
        print(f"[argument/analyze] doctrine_classifier skipped: {e}")

    return {
        "ok": True,
        "claim": claim,
        "side": body.side or "אובייקטיבי",
        "doctrine": doctrine,
        "doctrine_match": doctrine_match,
        "pro_arguments": pro_args[:8],
        "con_arguments": con_args[:8],
        "strength_score": strength_score,
        "strength_caption": _strength_caption(strength_score),
        "missing_facts": [f for f in missing_facts if f][:5],
        "tier_b_results": tier_b_results,
        "entitlement": decision.to_dict(),
        "meta": {
            "method": "bilateral_dual_frame",
            "section_classifier": "lexical_heuristic_v1",
            "n_pro": len(pro_args),
            "n_con": len(con_args),
            "n_tier_b": len(tier_b_results),
            "tier_b_triggered": needs_fallback,
        },
    }


# ──────────────────────────────────────────────────────────────────────
# v2.94 (Day 5) — Entitlements query, admin, and billing webhook
#
# GET  /v1/entitlements/me              — current user reads own tier
# POST /v1/admin/entitlements/set       — admin grants tier to email
# POST /v1/admin/entitlements/list      — admin lists all
# POST /v1/billing/webhook              — Lemon Squeezy stub (TODO)
# ──────────────────────────────────────────────────────────────────────

class _AdminSetTierRequest(BaseModel):  # type: ignore
    email: str
    tier: str
    valid_until_days: Optional[int] = 31
    founding: bool = False


def _check_admin_key(request) -> bool:
    """Admin endpoints — delegate to the SAME `auth.is_admin()` check
    the middleware uses at /v1/admin/*. Any key that passed the
    middleware will pass here. Single source of truth.

    Accepts both `X-API-Key` (canonical) and `X-Admin-Key` (legacy)
    headers. Previously this compared the raw value to TAU_RAG_ADMIN_KEY
    env var directly — that broke when the keystore had multiple admin
    keys (bootstrap-generated + env-var-forced), because the
    auth.is_admin lookup used a hash of either, but the env-var
    comparison only matched one specific value.
    """
    key = (request.headers.get("X-API-Key")
           or request.headers.get("X-Admin-Key") or "").strip()
    if not key:
        return False
    try:
        from ..middleware.auth import get_auth
        return get_auth().is_admin(key)
    except Exception:
        # Fallback to env-var comparison if auth store unavailable.
        expected = _os.environ.get("TAU_RAG_ADMIN_KEY", "").strip()
        return bool(expected) and key == expected


@app.get("/v1/entitlements/me")
def entitlement_me(request: Request):  # type: ignore
    """Returns the caller's current tier + credits remaining.
    Identity from X-User-Email header (anonymous → free)."""
    email = (request.headers.get("X-User-Email") or "").strip() or None
    from ..middleware.entitlements import get_entitlement_store, TIER_DEFS
    store = get_entitlement_store()
    info = store.get_tier(email)
    # Include the tier's pricing copy so the UI can render directly
    info["tier_def"] = {
        "monthly_credits": TIER_DEFS[info["tier"]]["monthly_credits"],
        "price_he": TIER_DEFS[info["tier"]]["price_he"],
        "unlocked_endpoints": sorted(
            TIER_DEFS[info["tier"]]["unlocked_endpoints"]),
    }
    return info


@app.post("/v1/admin/entitlements/set")
def admin_set_tier(body: _AdminSetTierRequest,
                   request: Request):  # type: ignore
    """Grant/revoke a tier manually. Used pre-payment-integration
    and for Founding 50 manual assignments."""
    if not _check_admin_key(request):
        return JSONResponse(status_code=403,
                            content={"ok": False, "reason": "admin_required"})
    from ..middleware.entitlements import get_entitlement_store
    store = get_entitlement_store()
    try:
        new_info = store.set_tier(
            email=body.email, tier=body.tier,
            valid_until_days=body.valid_until_days,
            founding=body.founding,
        )
        return {"ok": True, "entitlement": new_info}
    except ValueError as e:
        return JSONResponse(status_code=400,
                            content={"ok": False, "reason": str(e)})


@app.get("/v1/admin/entitlements/list")
def admin_list_entitlements(request: Request,  # type: ignore
                            limit: int = 100):
    """List all entitlement rows. Admin-only."""
    if not _check_admin_key(request):
        return JSONResponse(status_code=403,
                            content={"ok": False, "reason": "admin_required"})
    from ..middleware.entitlements import get_entitlement_store
    store = get_entitlement_store()
    return {"ok": True, "count": None, "entitlements": store.list_all(limit)}


@app.post("/v1/billing/webhook")
async def billing_webhook(request: Request):  # type: ignore
    """Lemon Squeezy webhook STUB. TODO when LS credentials are
    available:
      1. Verify HMAC signature: X-Signature header vs
         hmac.new(secret, body, sha256).hexdigest()
      2. Parse event_name from JSON ('subscription_created',
         'subscription_updated', 'subscription_cancelled',
         'order_created' for Founding 50 one-time)
      3. Map product variant_id → tier:
           legal-eye-founding-50 → solo (founding=True, valid_until=None)
           legal-eye-solo        → solo (valid_until_days=31)
           legal-eye-pro         → pro
           legal-eye-firm        → firm
      4. Extract email from data.attributes.user_email
      5. Call store.set_tier(email, tier, valid_until_days, founding)

    Right now: logs payload, returns 200 so LS doesn't retry."""
    try:
        body_bytes = await request.body()
        sig = request.headers.get("X-Signature", "(none)")
        # Log only — actual processing deferred to when LS credentials
        # exist and we wire up the HMAC verifier.
        print(f"[billing/webhook] received len={len(body_bytes)} sig={sig[:16]}...")
        # Lemon Squeezy expects 200 within 5s or it retries with
        # exponential backoff. We ack immediately, process async
        # (eventually).
        return {"ok": True, "stub": True,
                "note": "webhook handler not yet wired — payload logged"}
    except Exception as e:
        print(f"[billing/webhook] error: {e}")
        return JSONResponse(status_code=500,
                            content={"ok": False, "error": str(e)})


# ──────────────────────────────────────────────────────────────────────
# v2.95 (Day 6) — Litigation Memo Generator
#
# POST /v1/memo/generate
#
# Takes a pre-computed argument analysis result + the original claim and
# produces a Hebrew RTL .docx memo file. The client is expected to have
# JUST run /v1/argument/analyze and pass that result back here — avoids
# the cost of re-running the bilateral synthesis. Memo cost = 15 credits
# (per MONETIZATION.md), so a complete workflow is:
#   1. POST /v1/argument/analyze (5 credits, returns analysis)
#   2. POST /v1/memo/generate    (15 credits, returns DOCX)
# Total: 20 credits per case = ~10 memos/month on Solo (200 credits).
#
# Returns binary DOCX with Content-Type
# application/vnd.openxmlformats-officedocument.wordprocessingml.document.
# ──────────────────────────────────────────────────────────────────────

class _MemoGenerateRequest(BaseModel):  # type: ignore
    claim: str
    side: Optional[str] = "אובייקטיבי"
    # The full result dict from /v1/argument/analyze. Client passes its
    # last-known-good analysis to avoid the re-compute cost.
    analysis: Optional[Dict[str, Any]] = None
    # Optional free-text facts the lawyer wants to embed. Rendered as-is
    # in the "רקע עובדתי" section.
    facts: Optional[str] = ""
    # Optional author name (overrides anonymous "עורך-דין")
    author_name: Optional[str] = ""


def _build_argument_memo_docx(claim: str, side: str,
                              analysis: dict,
                              facts: str = "",
                              author_name: str = "") -> bytes:
    """Generate Hebrew RTL .docx memo from an argument analysis result.

    Sections:
      1. Header (claim + date + side + author)
      2. רקע עובדתי (free-text facts, or placeholder)
      3. הדוקטרינה הרלוונטית (verbatim doctrine quote)
      4. טענות תומכות (pro_arguments, verbatim)
      5. טענות נגדיות (con_arguments, verbatim)
      6. עובדות חסרות לחיזוק הטענה (bullet list)
      7. סיכום + ציון חוזק
      8. Footer (disclaimer + Legal Eye attribution)
    """
    from io import BytesIO
    import datetime as _dt
    try:
        from docx import Document as DocxDocument
        from docx.shared import Pt, Inches, RGBColor
        from docx.enum.text import WD_ALIGN_PARAGRAPH
        from docx.oxml.ns import qn
        from docx.oxml import OxmlElement
    except ImportError:
        raise RuntimeError("python-docx not installed; pip install python-docx")

    doc = DocxDocument()

    # Default font + RTL
    style = doc.styles["Normal"]
    style.font.name = "David"
    style.font.size = Pt(11)
    rPr = style.element.get_or_add_rPr()
    rFonts = rPr.find(qn("w:rFonts"))
    if rFonts is None:
        rFonts = OxmlElement("w:rFonts")
        rPr.append(rFonts)
    rFonts.set(qn("w:cs"), "David")
    rFonts.set(qn("w:hAnsi"), "David")

    def _rtl(p):
        pPr = p._p.get_or_add_pPr()
        bidi = OxmlElement("w:bidi")
        bidi.set(qn("w:val"), "1")
        pPr.append(bidi)
        p.alignment = WD_ALIGN_PARAGRAPH.RIGHT
        return p

    def _h(text: str, level: int = 1):
        h = doc.add_heading(text, level=level)
        _rtl(h)
        for run in h.runs:
            run.font.color.rgb = RGBColor(0xBF, 0x9B, 0x30)  # Old Gold
        return h

    def _para(text: str = "", italic: bool = False, color=None, size: int = 11):
        p = doc.add_paragraph()
        _rtl(p)
        p.paragraph_format.line_spacing = 1.45
        if text:
            r = p.add_run(text)
            r.italic = italic
            r.font.size = Pt(size)
            if color is not None:
                r.font.color.rgb = color
        return p

    # ── Header ─────────────────────────────────────────────────
    title = doc.add_heading("מזכר משפטי — ניתוח טענה", level=0)
    _rtl(title)
    for run in title.runs:
        run.font.color.rgb = RGBColor(0xBF, 0x9B, 0x30)

    meta = _para()
    date_str = _dt.date.today().strftime("%d/%m/%Y")
    side_label = {"תובע": "תובע", "נתבע": "נתבע",
                  "אובייקטיבי": "אובייקטיבי"}.get(side, side or "אובייקטיבי")
    meta.add_run(f"תאריך: {date_str}").font.size = Pt(10)
    meta.add_run(f"   ·   נקודת מבט: {side_label}").font.size = Pt(10)
    if author_name:
        meta.add_run(f"   ·   הוכן ע״י: {author_name}").font.size = Pt(10)

    # Claim card
    _h("הטענה לבדיקה", level=2)
    p = _para(claim)
    p.paragraph_format.left_indent = Inches(0.0)

    # ── 1. רקע עובדתי ────────────────────────────────────────
    _h("רקע עובדתי", level=2)
    if facts and facts.strip():
        for para in facts.split("\n\n"):
            if para.strip():
                _para(para.strip())
    else:
        _para(
            "[ למלא ע״י עוה״ד — מועדים, צדדים, השתלשלות עניינים, "
            "נקודות מחלוקת ]",
            italic=True,
            color=RGBColor(0x94, 0xA3, 0xB8),
            size=10,
        )

    # ── 2. הדוקטרינה הרלוונטית ──────────────────────────────
    doctrine = analysis.get("doctrine") or {}
    if doctrine.get("anchor_label"):
        _h("הדוקטרינה הרלוונטית", level=2)
        p = _para()
        r = p.add_run(f"🎯 {doctrine['anchor_label']}")
        r.bold = True
        if not doctrine.get("promoted"):
            r2 = p.add_run("   ⚠ ביטחון נמוך — איכות הקלאסטר תחת הסף")
            r2.font.size = Pt(9)
            r2.font.color.rgb = RGBColor(0xB4, 0x53, 0x09)

        if doctrine.get("anchor_quote"):
            quote = doctrine["anchor_quote"].strip()
            qp = _para(f'"{quote[:600]}{"..." if len(quote) > 600 else ""}"',
                       italic=True,
                       color=RGBColor(0x47, 0x55, 0x69),
                       size=10)
            qp.paragraph_format.left_indent = Inches(0.3)

    # ── 3. טענות תומכות ──────────────────────────────────────
    pro_args = analysis.get("pro_arguments") or []
    if pro_args:
        _h(f"טענות תומכות ({len(pro_args)})", level=2)
        for i, arg in enumerate(pro_args, 1):
            p = _para()
            r = p.add_run(f"{i}. ")
            r.bold = True
            text = (arg.get("text") or "").strip()
            # Cap each arg at ~800 chars to keep the memo readable
            p.add_run(text[:800] + ("..." if len(text) > 800 else ""))

            # Source line under each argument
            src = arg.get("source_case") or "(ללא ציטוט)"
            score = arg.get("source_score") or 0
            sp = _para()
            sp.paragraph_format.left_indent = Inches(0.3)
            sr = sp.add_run(f"📖 מקור: {src}")
            sr.font.size = Pt(9)
            sr.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)
            if score:
                sr2 = sp.add_run(f"   ·   דירוג: {int(float(score)*100)}")
                sr2.font.size = Pt(9)
                sr2.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)

    # ── 4. טענות נגדיות ──────────────────────────────────────
    con_args = analysis.get("con_arguments") or []
    if con_args:
        _h(f"טענות נגדיות ({len(con_args)})", level=2)
        for i, arg in enumerate(con_args, 1):
            p = _para()
            r = p.add_run(f"{i}. ")
            r.bold = True
            text = (arg.get("text") or "").strip()
            p.add_run(text[:800] + ("..." if len(text) > 800 else ""))

            src = arg.get("source_case") or "(ללא ציטוט)"
            score = arg.get("source_score") or 0
            sp = _para()
            sp.paragraph_format.left_indent = Inches(0.3)
            sr = sp.add_run(f"📖 מקור: {src}")
            sr.font.size = Pt(9)
            sr.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)
            if score:
                sr2 = sp.add_run(f"   ·   דירוג: {int(float(score)*100)}")
                sr2.font.size = Pt(9)
                sr2.font.color.rgb = RGBColor(0x64, 0x74, 0x8B)

    # ── 5. עובדות חסרות לחיזוק ──────────────────────────────
    missing = analysis.get("missing_facts") or []
    if missing:
        _h("עובדות חסרות לחיזוק הטענה", level=2)
        for f in missing:
            p = _para(f"• {f}")
            p.paragraph_format.left_indent = Inches(0.3)

    # ── 6. סיכום + ציון חוזק ─────────────────────────────────
    _h("סיכום", level=2)
    score = analysis.get("strength_score") or 0
    caption = analysis.get("strength_caption") or ""
    sp = _para()
    sr = sp.add_run(f"ציון חוזק: {score}/100")
    sr.bold = True
    sr.font.size = Pt(13)
    sr.font.color.rgb = RGBColor(0xBF, 0x9B, 0x30)
    if caption:
        sp.add_run(f"   ·   {caption}").font.size = Pt(11)

    n_pro = len(pro_args)
    n_con = len(con_args)
    _para(
        f"הניתוח מבוסס על {n_pro + n_con} מקורות "
        f"({n_pro} תומכים, {n_con} נגדיים). "
        f"כל הציטוטים מילה במילה מהפסיקה — אין הזיות AI."
    )

    # ── Footer / Disclaimer ──────────────────────────────────
    doc.add_paragraph()  # blank
    footer_h = doc.add_heading("הסתייגות", level=3)
    _rtl(footer_h)
    for run in footer_h.runs:
        run.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8)

    _para(
        "⚠ Legal Eye הוא כלי מחקר משפטי, לא ייעוץ משפטי. "
        "המזכר מבוסס על שליפת ציטוטים מקורפוס פסיקת בית-המשפט העליון "
        "ועל heuristics לסיווג בעד/נגד. החלטה משפטית טעונה בדיקה אישית "
        "של עורך-דין מוסמך לנסיבות הספציפיות של התיק.",
        italic=True,
        color=RGBColor(0x64, 0x74, 0x8B),
        size=9,
    )
    _para(
        f"נוצר ע״י Legal Eye בתאריך {date_str} · "
        f"https://legal-eye.1bigfam.com",
        italic=True,
        color=RGBColor(0x94, 0xA3, 0xB8),
        size=8,
    )

    buf = BytesIO()
    doc.save(buf)
    return buf.getvalue()


@app.post("/v1/memo/generate")
def memo_generate(body: _MemoGenerateRequest,
                  request: Request):  # type: ignore
    """Build a Hebrew RTL .docx memo from a pre-computed argument
    analysis. Gated to Solo+ at 15 credits."""
    claim = (body.claim or "").strip()
    if not claim:
        return JSONResponse(status_code=400,
                            content={"ok": False, "reason": "empty_claim"})
    analysis = body.analysis or {}
    if not analysis.get("pro_arguments") and not analysis.get("con_arguments"):
        return JSONResponse(
            status_code=400,
            content={"ok": False, "reason": "empty_analysis",
                     "message_he": "תוצאת ניתוח חסרה — הרץ /v1/argument/analyze קודם."})

    # Tier gate (15 credits per memo)
    user_email = (request.headers.get("X-User-Email") or "").strip() or None
    from ..middleware.entitlements import check_entitlement
    decision = check_entitlement(user_email, "memo_generate")
    if not decision.allowed:
        status = 402 if decision.reason == "credits_exhausted" else 403
        return JSONResponse(status_code=status, content={
            "ok": False,
            "reason": decision.reason,
            "entitlement": decision.to_dict(),
            "message_he": (
                "המנוי שלך הגיע למכסת הקרדיטים. שדרוג ל-Pro פותח מזכרים ללא הגבלה."
                if decision.reason == "credits_exhausted"
                else "מזכר משפטי הוא פיצ׳ר חבילות בתשלום (Solo ומעלה). "
                     "שדרג ל-Solo (₪199 לחיים — Founding 50)."
            ),
        })

    # Build the DOCX
    try:
        docx_bytes = _build_argument_memo_docx(
            claim=claim,
            side=body.side or "אובייקטיבי",
            analysis=analysis,
            facts=body.facts or "",
            author_name=body.author_name or "",
        )
    except RuntimeError as e:
        # python-docx missing — server-side install issue, not client
        return JSONResponse(status_code=500, content={
            "ok": False, "reason": "docx_unavailable", "error": str(e)})
    except Exception as e:
        print(f"[memo/generate] build error: {e}")
        return JSONResponse(status_code=500, content={
            "ok": False, "reason": "build_failed",
            "error": f"{type(e).__name__}: {e}"})

    # Filename: include date + first 20 chars of claim
    import datetime as _dt
    safe_claim = "".join(c for c in claim[:30] if c.isalnum() or c in " _-")
    fname = f"מזכר_{_dt.date.today().strftime('%Y-%m-%d')}_{safe_claim.strip()}.docx"

    headers = {
        "Content-Disposition": f'attachment; filename="{fname}"',
        # Surface remaining credits in a response header so the UI can
        # update the badge without a second request
        "X-Entitlement-Credits-Remaining": str(decision.credits_remaining),
        "X-Entitlement-Tier": decision.tier,
    }
    return Response(
        content=docx_bytes,
        media_type=("application/vnd.openxmlformats-officedocument"
                    ".wordprocessingml.document"),
        headers=headers,
    )


# ──────────────────────────────────────────────────────────────────────
# v2.96 (Day 7) — Tier B router observability
#
# GET /v1/system/shards — admin-only. Shows which Tier B shards exist,
# which are loaded in memory, LRU hit/load/eviction counts. Used to
# debug "why didn't query X return tier_b_results" tickets.
# ──────────────────────────────────────────────────────────────────────

# ──────────────────────────────────────────────────────────────────────
# v2.96.6 (Day 7) — Procedural-cluster browser
#
# GET /v1/clusters/procedure/grouped
#
# Filters the global doctrine clusters (built by clustering.py over the
# 17K curated corpus) to PROCEDURAL ones, then sub-classifies into
# civil / criminal / administrative based on:
#   1. Catalog match — if cluster anchor maps to a known doctrine in
#      doctrines.json, use the doctrine's sub_domain
#   2. Member domain — dominant `metadata.domain` of cluster members
#   3. Keyword fallback — search anchor_quote for procedural markers
#
# Returns clusters grouped by sub_domain. UI can render as a
# navigation tree: "סדרי דין → אזרחי → השתק עילה / שיהוי / סילוק על
# הסף", etc.
#
# Public read (no admin gate) since this is browsing curated public
# legal doctrine data.
# ──────────────────────────────────────────────────────────────────────

_PROCEDURAL_KEYWORDS_CIVIL = (
    "תקנה 100", "תקנה 41", "תקנה 42", "תקנה 43",
    "סדר הדין האזרחי", "תקנות סדר הדין",
    "סילוק על הסף", "מעשה בית-דין", "מעשה בית דין",
    "השתק פלוגתא", "שיהוי בהגשת תביעה",
)
_PROCEDURAL_KEYWORDS_CRIMINAL = (
    "חזקת חפות", "חזקת החפות",
    "זכות השתיקה", "אזהרה לפני חקירה",
    "פירות העץ המורעל", "פסילת ראיות",
    "ספק סביר", "מעבר לכל ספק",
    "סדר הדין הפלילי",
)
_PROCEDURAL_KEYWORDS_ADMIN = (
    "מבחן הסבירות", "מתחם הסבירות",
    "חובת ההנמקה", "חובת הנמקה",
    "שיהוי בעתירה",
    "סדרי המינהל", "חוק בתי משפט לעניינים מנהליים",
    "Wednesbury",
)


def _classify_cluster_subdomain(cluster, members_resolved: list) -> str:
    """Return 'civil' / 'criminal' / 'administrative' / 'general'.

    Strategy (in order):
      1. Catalog match — if anchor maps to a doctrine with sub_domain
      2. Member-domain majority
      3. Keyword sweep over anchor_quote + anchor_label
    """
    # 1. Catalog match
    try:
        from ..intelligence.doctrine_classifier import load_doctrine_catalog
        cat = load_doctrine_catalog()
        anchor_label = (getattr(cluster, "anchor_label", "") or "").strip()
        anchor_quote = (getattr(cluster, "anchor_quote", "") or "")
        for doc in cat.get("doctrines", []):
            for case in doc.get("leading_cases", []) or []:
                cite = case.get("citation", "")
                if cite and cite in anchor_label:
                    sd = doc.get("sub_domain")
                    if sd in ("civil", "criminal", "administrative"):
                        return sd
                    # No explicit sub_domain — infer from doctrine.domain
                    d = doc.get("domain")
                    if d == "criminal":     return "criminal"
                    if d == "administrative": return "administrative"
                    if d == "procedure":    return "civil"
    except Exception:
        pass

    # 2. Member-domain majority — count metadata.domain across resolved members
    domain_counts: dict = {}
    for m in members_resolved:
        md = m.get("metadata") if isinstance(m, dict) else None
        if md:
            d = md.get("domain")
            if d:
                domain_counts[d] = domain_counts.get(d, 0) + 1
    if domain_counts:
        top = max(domain_counts.items(), key=lambda x: x[1])[0]
        if top == "criminal":       return "criminal"
        if top == "administrative": return "administrative"
        if top in ("contracts", "torts", "labor", "family",
                   "corporate", "banking", "tax", "property",
                   "evidence", "procedure"):
            return "civil"  # all are civil-side procedural matters

    # 3. Keyword sweep
    haystack = (
        (getattr(cluster, "anchor_label", "") or "") + " " +
        (getattr(cluster, "anchor_quote", "") or "")
    )
    if any(k in haystack for k in _PROCEDURAL_KEYWORDS_CRIMINAL):
        return "criminal"
    if any(k in haystack for k in _PROCEDURAL_KEYWORDS_ADMIN):
        return "administrative"
    if any(k in haystack for k in _PROCEDURAL_KEYWORDS_CIVIL):
        return "civil"

    return "general"


def _cluster_is_procedural(cluster, members_resolved: list) -> bool:
    """Heuristic: is this cluster about procedural rulings (vs
    substantive law)? Triggers when anchor or members reference
    procedural keywords or have domain='procedure'."""
    # Quick: anchor text procedural?
    haystack = (
        (getattr(cluster, "anchor_label", "") or "") + " " +
        (getattr(cluster, "anchor_quote", "") or "")
    )
    all_kw = (_PROCEDURAL_KEYWORDS_CIVIL +
              _PROCEDURAL_KEYWORDS_CRIMINAL +
              _PROCEDURAL_KEYWORDS_ADMIN)
    if any(k in haystack for k in all_kw):
        return True
    # Or: cluster.domain
    if (getattr(cluster, "domain", None) == "procedure"):
        return True
    # Or: majority of members are procedural
    n_proc = sum(1 for m in members_resolved
                 if isinstance(m, dict)
                 and (m.get("metadata") or {}).get("domain") == "procedure")
    return n_proc >= max(2, len(members_resolved) // 3)


@app.get("/v1/clusters/procedure/grouped")
def clusters_procedure_grouped(limit_per_sub: int = 30):
    """Procedure-only clusters, grouped by sub-domain (civil / criminal /
    administrative). Returns at most `limit_per_sub` clusters per group.

    Each cluster includes anchor info, member count, and (if matched)
    the doctrine catalog entry (name_he, doctrine_id)."""
    try:
        from ..pipeline import get_pipeline
        from ..clustering import get_or_build_clusters, cluster_summary
        from ..intelligence.doctrine_classifier import load_doctrine_catalog
        pipe = get_pipeline()
        all_clusters = get_or_build_clusters(pipe)
        catalog = load_doctrine_catalog()

        # Build a lookup: citation_string → doctrine entry
        # (so we can hydrate clusters with matching catalog entry)
        citation_to_doctrine: dict = {}
        for doc in catalog.get("doctrines", []):
            for case in doc.get("leading_cases", []) or []:
                cite = case.get("citation", "")
                if cite:
                    citation_to_doctrine[cite] = {
                        "doctrine_id": doc["id"],
                        "name_he": doc["name_he"],
                        "domain": doc.get("domain"),
                        "sub_domain": doc.get("sub_domain"),
                    }

        grouped: dict = {"civil": [], "criminal": [], "administrative": [], "general": []}
        indexed_docs = getattr(pipe, "_indexed_docs", None) or []
        doc_by_id = {d.id: d for d in indexed_docs} if indexed_docs else {}

        for c in all_clusters:
            # Resolve a small sample of members for classification
            sample_member_ids = (list(getattr(c, "applications", []) or [])[:10]
                                  + list(getattr(c, "origins", []) or [])[:5])
            members_resolved = []
            for mid in sample_member_ids:
                d = doc_by_id.get(mid)
                if d:
                    members_resolved.append({
                        "id": d.id,
                        "metadata": getattr(d, "metadata", {}) or {},
                    })

            if not _cluster_is_procedural(c, members_resolved):
                continue

            sub = _classify_cluster_subdomain(c, members_resolved)
            summary = cluster_summary(c)
            # Hydrate with catalog match if anchor cites a known doctrine
            anchor_label = getattr(c, "anchor_label", "") or ""
            catalog_match = None
            for cite, ent in citation_to_doctrine.items():
                if cite in anchor_label:
                    catalog_match = ent
                    break
            if catalog_match:
                summary["catalog_match"] = catalog_match
            summary["sub_domain"] = sub
            grouped[sub].append(summary)

        # Sort each group by cluster size (n_applications) and cap
        for k, items in grouped.items():
            items.sort(key=lambda x: -(x.get("n_applications", 0)
                                         or x.get("size", 0)))
            grouped[k] = items[:limit_per_sub]

        return {
            "ok": True,
            "groups": grouped,
            "n_civil": len(grouped["civil"]),
            "n_criminal": len(grouped["criminal"]),
            "n_administrative": len(grouped["administrative"]),
            "n_general": len(grouped["general"]),
            "meta": {
                "method": "catalog_match + member_domain_majority + keyword_fallback",
                "catalog_doctrines": len(catalog.get("doctrines", [])),
                "total_clusters_in_pipe": len(all_clusters),
            },
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.get("/v1/system/shards/available")
def system_shards_available():  # type: ignore
    """Public-readable: just the list of Tier B shards loaded by the
    router. No auth. Used for diagnostics (e.g., confirming the
    multi-repo download succeeded at boot)."""
    try:
        from ..retrieve.shard_router import get_shard_router
        router = get_shard_router()
        return {
            "ok": True,
            "available_shards": sorted(router.available_shards),
            "n_shards": len(router.available_shards),
            "shards_dirs_count": len(getattr(router, "_shards_dirs", [])),
        }
    except Exception as e:
        return {"ok": False, "error": f"{type(e).__name__}: {e}"}


@app.get("/v1/system/shards")
def system_shards(request: Request):  # type: ignore
    """Admin-only: ShardRouter status (LRU cache state, available
    shards, hit/load/evict counts)."""
    if not _check_admin_key(request):
        return JSONResponse(status_code=403,
                            content={"ok": False, "reason": "admin_required"})
    try:
        from ..retrieve.shard_router import get_shard_router
        router = get_shard_router()
        status = router.status()
        # Enrich with per-shard manifest contents (n_docs, n_chunks,
        # build_time) — discovered at boot, useful for monitoring.
        from pathlib import Path
        import json as _json
        shards_dir = Path(status["shards_dir"])
        per_shard = {}
        for shard_name in status["available_shards"]:
            mp = shards_dir / shard_name / "retriever_state" / "manifest.json"
            if mp.exists():
                try:
                    per_shard[shard_name] = _json.loads(
                        mp.read_text(encoding="utf-8"))
                except Exception:
                    per_shard[shard_name] = {"error": "manifest unreadable"}
        return {
            "ok": True,
            "router": status,
            "shard_manifests": per_shard,
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False,
            "reason": "router_unavailable",
            "error": f"{type(e).__name__}: {e}",
        })


# ──────────────────────────────────────────────────────────────────────
# v2.98 (Day 8) — Live demo endpoint for landing page
#
# POST /v1/demo/analyze
#
# Anonymous-allowed wrapper around /v1/argument/analyze for the
# landing-page demo widget. No auth, no credit deduction, but capped
# output (2 pro + 2 con) and rate-limited by IP (10/hour).
#
# The full version (5 pro + 5 con + tier_b + doctrine_match) stays
# gated to Solo+ at 5 credits — this just shows enough to whet the
# appetite for signup. The "see full analysis" button on the result
# triggers the email capture flow.
# ──────────────────────────────────────────────────────────────────────

import time as _demo_time
from collections import deque

# Simple in-memory IP → request timestamps (deque). Persistent only
# within process lifetime — fine since this is anti-spam, not a hard
# quota. Each IP allowed 10 requests/hour.
_DEMO_RATE_WINDOW_SEC = 3600  # 1 hour
_DEMO_RATE_LIMIT = 10
_DEMO_RATE_BUCKETS: Dict[str, deque] = {}


def _demo_check_rate(ip: str) -> tuple:
    """Returns (allowed: bool, retry_after_sec: int)."""
    now = _demo_time.time()
    bucket = _DEMO_RATE_BUCKETS.setdefault(ip, deque())
    # Evict old timestamps
    while bucket and bucket[0] < now - _DEMO_RATE_WINDOW_SEC:
        bucket.popleft()
    if len(bucket) >= _DEMO_RATE_LIMIT:
        retry_after = int(bucket[0] + _DEMO_RATE_WINDOW_SEC - now)
        return False, max(retry_after, 60)
    bucket.append(now)
    return True, 0


class _DemoAnalyzeRequest(BaseModel):  # type: ignore
    claim: str


@app.post("/v1/demo/analyze")
def demo_analyze(body: _DemoAnalyzeRequest, request: Request):  # type: ignore
    """Public demo endpoint for landing-page widget.

    Differences from /v1/argument/analyze:
      - No auth required (anonymous OK)
      - No credit deduction
      - Rate-limited per-IP (10/hour)
      - Output capped at 2 pro + 2 con (vs 8 + 8 for full)
      - No `doctrine_match`, `tier_b_results`, `missing_facts`
        (those are paid-tier features that encourage signup)
    """
    claim = (body.claim or "").strip()
    if not claim:
        return JSONResponse(status_code=400, content={
            "ok": False, "reason": "empty_claim",
            "message_he": "אנא הכנס טענה לניתוח."
        })
    if len(claim) > 600:
        return JSONResponse(status_code=400, content={
            "ok": False, "reason": "claim_too_long",
            "message_he": "טענה ארוכה מ-600 תווים. קצר ונסה שוב."
        })

    # Rate limit by IP (X-Forwarded-For first if behind proxy)
    fwd = request.headers.get("X-Forwarded-For", "")
    ip = (fwd.split(",")[0].strip() if fwd
          else (request.client.host if request.client else "unknown"))
    allowed, retry = _demo_check_rate(ip)
    if not allowed:
        return JSONResponse(status_code=429, content={
            "ok": False,
            "reason": "rate_limit",
            "retry_after_sec": retry,
            "message_he": (
                f"רק 10 ניתוחי-דמו לשעה. נסה שוב בעוד "
                f"{retry//60} דקות, או הירשם ל-Solo (₪199 לחיים) "
                f"לקבלת ניתוח ללא הגבלה."
            ),
        })

    # Run the same synthesis pipeline as /v1/argument/analyze, but
    # capped. We inline a slim version rather than calling the full
    # endpoint (avoids the entitlement gate that would 403 anonymous).
    try:
        from ..pipeline import get_pipeline
        from ..intelligence import StrategySynthesizer
        from ..hierarchical_graph import get_or_build_hgraph
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "reason": "import_failed",
            "error": f"{type(e).__name__}: {e}",
        })

    pipe = get_pipeline()
    named = getattr(pipe.retrievers, "_retrievers", {}) or {}
    cbr_r = named.get("hebrew_encoder") or pipe.retrievers
    syn = StrategySynthesizer(
        retriever=pipe.retrievers, cbr_retriever=cbr_r,
        full_text_loader=lambda did: pipe.get_text(did) or "",
        pipeline=pipe, polish_with_tau_llm=False,
    )

    def _frame(side: str) -> list:
        try:
            r = syn.synthesize(user_facts=claim, side=side, top_k=5)
            d = r.to_dict() if hasattr(r, "to_dict") else dict(r)
            cba = d.get("case_based_arguments") or {}
            return cba.get("drafted_arguments_for_user") or []
        except Exception:
            return []

    pro_raw = _frame("plaintiff")
    con_raw = _frame("defendant")

    def _shape(arg: dict) -> dict:
        text = (arg.get("argument") or arg.get("text") or "").strip()
        sources = arg.get("source_cases") or []
        s0 = sources[0] if sources else {}
        case_id = (s0.get("title") or s0.get("case_id")
                   or arg.get("source_case_id"))
        if not case_id:
            # v2.99 — extract from embedded text
            case_id = _extract_citation_from_text(text) or "(ללא ציטוט)"
        return {
            "text": text[:400] + ("..." if len(text) > 400 else ""),
            "source_case": case_id,
        }

    # Dedup + cap
    seen = set()
    pro_args = []
    for arg in pro_raw:
        src = ((arg.get("source_cases") or [{}])[0].get("case_id")
               or arg.get("argument", "")[:60])
        if src in seen: continue
        seen.add(src)
        pro_args.append(_shape(arg))
        if len(pro_args) >= 2: break

    con_args = []
    for arg in con_raw:
        src = ((arg.get("source_cases") or [{}])[0].get("case_id")
               or arg.get("argument", "")[:60])
        if src in seen: continue
        seen.add(src)
        con_args.append(_shape(arg))
        if len(con_args) >= 2: break

    # Cheap strength score from synthesizer signals
    n_total = len(pro_raw) + len(con_raw)
    if n_total >= 6:
        strength = 65 + min(20, n_total)  # 65-85 range
        caption = "טענה חזקה יחסית — פסיקה תומכת מספקת"
    elif n_total >= 3:
        strength = 50 + n_total * 5
        caption = "טענה בינונית — פסיקה מעורבת"
    else:
        strength = 25 + n_total * 7
        caption = "טענה חלשה — פסיקה ברובה סותרת"

    # Find doctrine match (catalog) — useful tease for signup
    doctrine_label = None
    try:
        from ..intelligence.doctrine_classifier import classify_doctrines
        matches = classify_doctrines(claim, k=1, min_score=1.0)
        if matches:
            doctrine_label = matches[0].name_he
    except Exception:
        pass

    return {
        "ok": True,
        "claim": claim,
        "strength_score": strength,
        "strength_caption": caption,
        "doctrine_label": doctrine_label,
        "pro_arguments": pro_args,   # capped at 2
        "con_arguments": con_args,   # capped at 2
        "locked": {
            "n_more_pro": max(0, len(pro_raw) - len(pro_args)),
            "n_more_con": max(0, len(con_raw) - len(con_args)),
            "message_he": (
                "💎 הירשם כדי לראות את הניתוח המלא — כל הטענות, "
                "ציטוטים מילה במילה ניתנים לפתיחה, ועובדות חסרות."
            ),
        },
        "meta": {
            "rate_limit_remaining": _DEMO_RATE_LIMIT - len(_DEMO_RATE_BUCKETS[ip]),
        },
    }


# ──────────────────────────────────────────────────────────────────────
# Day 39 — Streaming demo analyze (SSE).
#
# Same pipeline as /v1/demo/analyze, but emits Server-Sent Events
# at each phase so the user sees progressive feedback instead of
# a 10-15s "מנתח..." opaque wait.
#
# Event types (JSON in `data:` line):
#   {event: "started",         claim: str}
#   {event: "retrieving"}
#   {event: "side_done",       side: "pro" | "con", n: int}
#   {event: "shaping"}
#   {event: "doctrine_match",  name_he: str | null}
#   {event: "done",            result: <full demo_analyze payload>}
#   {event: "error",           reason: str, message_he: str}
#
# Same rate limit (10/hr/IP) — the streaming version shares the same
# bucket so users can't bypass the limit by switching endpoints.
# ──────────────────────────────────────────────────────────────────────

@app.post("/v1/demo/analyze/stream")
def demo_analyze_stream(body: _DemoAnalyzeRequest, request: Request):  # type: ignore
    """SSE-streaming variant of /v1/demo/analyze. Same payload at the
    end, but with phase events along the way."""
    from fastapi.responses import StreamingResponse as _SR
    import json as _j

    claim = (body.claim or "").strip()

    def _sse(obj):
        return f"data: {_j.dumps(obj, ensure_ascii=False)}\n\n"

    def gen():
        # ── input validation ──
        if not claim:
            yield _sse({"event": "error", "reason": "empty_claim",
                        "message_he": "אנא הכנס טענה לניתוח."})
            return
        if len(claim) > 600:
            yield _sse({"event": "error", "reason": "claim_too_long",
                        "message_he": "טענה ארוכה מ-600 תווים. קצר ונסה שוב."})
            return

        # ── rate limit ──
        fwd = request.headers.get("X-Forwarded-For", "")
        ip = (fwd.split(",")[0].strip() if fwd
              else (request.client.host if request.client else "unknown"))
        allowed, retry = _demo_check_rate(ip)
        if not allowed:
            yield _sse({
                "event": "error", "reason": "rate_limit",
                "retry_after_sec": retry,
                "message_he": (
                    f"רק 10 ניתוחי-דמו לשעה. נסה שוב בעוד "
                    f"{retry//60} דקות, או הירשם ל-Solo (₪199 לחיים)."
                ),
            })
            return

        # ── started ──
        yield _sse({"event": "started", "claim": claim[:120]})

        # ── import + build synthesizer ──
        try:
            from ..pipeline import get_pipeline
            from ..intelligence import StrategySynthesizer
        except Exception as e:
            yield _sse({"event": "error", "reason": "import_failed",
                        "message_he": f"שגיאת מערכת: {type(e).__name__}"})
            return

        pipe = get_pipeline()
        named = getattr(pipe.retrievers, "_retrievers", {}) or {}
        cbr_r = named.get("hebrew_encoder") or pipe.retrievers
        syn = StrategySynthesizer(
            retriever=pipe.retrievers, cbr_retriever=cbr_r,
            full_text_loader=lambda did: pipe.get_text(did) or "",
            pipeline=pipe, polish_with_tau_llm=False,
        )

        # ── retrieve plaintiff side ──
        yield _sse({"event": "retrieving", "side": "pro",
                    "message_he": "מחפש פסיקה לטיעוני בעד..."})
        try:
            r = syn.synthesize(user_facts=claim, side="plaintiff", top_k=5)
            d = r.to_dict() if hasattr(r, "to_dict") else dict(r)
            cba = d.get("case_based_arguments") or {}
            pro_raw = cba.get("drafted_arguments_for_user") or []
        except Exception:
            pro_raw = []
        yield _sse({"event": "side_done", "side": "pro", "n": len(pro_raw)})

        # ── retrieve defendant side ──
        yield _sse({"event": "retrieving", "side": "con",
                    "message_he": "מחפש פסיקה לטיעונים נגד..."})
        try:
            r = syn.synthesize(user_facts=claim, side="defendant", top_k=5)
            d = r.to_dict() if hasattr(r, "to_dict") else dict(r)
            cba = d.get("case_based_arguments") or {}
            con_raw = cba.get("drafted_arguments_for_user") or []
        except Exception:
            con_raw = []
        yield _sse({"event": "side_done", "side": "con", "n": len(con_raw)})

        # ── shape + dedup ──
        yield _sse({"event": "shaping",
                    "message_he": "מתעצב ומסיר כפילויות..."})

        def _shape(arg):
            text = (arg.get("argument") or arg.get("text") or "").strip()
            sources = arg.get("source_cases") or []
            s0 = sources[0] if sources else {}
            case_id = (s0.get("title") or s0.get("case_id")
                       or arg.get("source_case_id"))
            if not case_id:
                case_id = _extract_citation_from_text(text) or "(ללא ציטוט)"
            return {
                "text": text[:400] + ("..." if len(text) > 400 else ""),
                "source_case": case_id,
            }

        seen = set()
        pro_args = []
        for arg in pro_raw:
            src = ((arg.get("source_cases") or [{}])[0].get("case_id")
                   or arg.get("argument", "")[:60])
            if src in seen: continue
            seen.add(src)
            pro_args.append(_shape(arg))
            if len(pro_args) >= 2: break

        con_args = []
        for arg in con_raw:
            src = ((arg.get("source_cases") or [{}])[0].get("case_id")
                   or arg.get("argument", "")[:60])
            if src in seen: continue
            seen.add(src)
            con_args.append(_shape(arg))
            if len(con_args) >= 2: break

        # ── strength + doctrine ──
        n_total = len(pro_raw) + len(con_raw)
        if n_total >= 6:
            strength = 65 + min(20, n_total)
            caption = "טענה חזקה יחסית — פסיקה תומכת מספקת"
        elif n_total >= 3:
            strength = 50 + n_total * 5
            caption = "טענה בינונית — פסיקה מעורבת"
        else:
            strength = 25 + n_total * 7
            caption = "טענה חלשה — פסיקה ברובה סותרת"

        # Day 42 — explicit pacing between each visible phase so the user
        # actually SEES each step appear (was: 5 events landed in <50ms
        # because doctrine_match, summary, pro_start are all fast python
        # ops — felt like a single flash, not progressive streaming).
        import time as _ti
        PHASE_PAUSE = 0.55   # gap between distinct UI phases
        CARD_PAUSE  = 0.55   # gap between successive argument cards

        # ── doctrine match (separate, visible) ──
        doctrine_label = None
        try:
            from ..intelligence.doctrine_classifier import classify_doctrines
            matches = classify_doctrines(claim, k=1, min_score=1.0)
            if matches:
                doctrine_label = matches[0].name_he
        except Exception:
            pass
        _ti.sleep(PHASE_PAUSE)
        yield _sse({"event": "doctrine_match", "name_he": doctrine_label})

        # ── summary (doctrine + strength + locked counts) ──
        _ti.sleep(PHASE_PAUSE)
        yield _sse({
            "event": "summary",
            "doctrine_label": doctrine_label,
            "strength_score":   strength,
            "strength_caption": caption,
            "n_pro": len(pro_args),
            "n_con": len(con_args),
            "locked": {
                "n_more_pro": max(0, len(pro_raw) - len(pro_args)),
                "n_more_con": max(0, len(con_raw) - len(con_args)),
                "message_he": (
                    "💎 הירשם כדי לראות את הניתוח המלא — כל הטענות, "
                    "ציטוטים מילה במילה ניתנים לפתיחה, ועובדות חסרות."
                ),
            },
        })

        # ── Pro side ──
        _ti.sleep(PHASE_PAUSE)
        yield _sse({"event": "pro_start", "n_total": len(pro_args)})
        for i, arg in enumerate(pro_args):
            _ti.sleep(CARD_PAUSE)
            yield _sse({
                "event":    "pro_argument",
                "index":    i,
                "n_total":  len(pro_args),
                "arg":      arg,
            })

        # ── Con side ──
        _ti.sleep(PHASE_PAUSE)
        yield _sse({"event": "con_start", "n_total": len(con_args)})
        for i, arg in enumerate(con_args):
            _ti.sleep(CARD_PAUSE)
            yield _sse({
                "event":    "con_argument",
                "index":    i,
                "n_total":  len(con_args),
                "arg":      arg,
            })

        # ── Done — meta only (UI uses this to enable the email-capture form) ──
        _ti.sleep(PHASE_PAUSE)
        yield _sse({
            "event": "done",
            "meta": {
                "rate_limit_remaining": _DEMO_RATE_LIMIT - len(_DEMO_RATE_BUCKETS[ip]),
                "claim": claim,
            },
        })

    return _SR(gen(), media_type="text/event-stream",
               headers={
                   "Cache-Control": "no-cache",
                   "X-Accel-Buffering": "no",   # disable proxy buffering
                   "Connection": "keep-alive",
               })


# ──────────────────────────────────────────────────────────────────────
# v2.99.2 (Day 11) — Opposing Counsel Mode
#
# POST /v1/argument/opposing-counsel
#
# Pro-tier flagship feature. Given a claim + the user's side, returns
# the strongest ADVERSARIAL analysis: what the opposing counsel would
# argue, how they'd attack, and what facts weaken the user's position.
#
# Gated to Pro+ (8 credits). Differs from /v1/argument/analyze:
#   - Returns 6-8 opposing arguments (not 2-3)
#   - Reframes "missing facts" as "weaknesses" (advantage to opponent)
#   - Lists procedural attack vectors (שיהוי, מעשה בית-דין, התיישנות)
#   - Doctrine exceptions ranked by attack utility
#
# Purpose: lawyer pressure-tests their case BEFORE the opponent does.
# ──────────────────────────────────────────────────────────────────────

class _OpposingCounselRequest(BaseModel):  # type: ignore
    claim: str
    side: Optional[str] = "תובע"  # the user's side — we attack from the OTHER
    top_k: int = 10


# Procedural attack vectors — generic challenges any opponent can raise.
# Ranked by how often they're decisive in practice.
_PROCEDURAL_ATTACK_VECTORS_HE = [
    {"label": "שיהוי", "desc": "אם התביעה הוגשה זמן רב לאחר התגבשות העילה — טענה שהשתהות יצרה הסתמכות / איבדה ראיות.",
     "doctrine_id": "shihui_tviya_ezrachit"},
    {"label": "התיישנות", "desc": "בדיקה האם תקופת ההתיישנות הסטטוטורית פגעה בעילה — ס' 5-7 לחוק ההתיישנות, התשי\"ח-1958.",
     "doctrine_id": None},
    {"label": "מעשה בית-דין", "desc": "האם הסוגיה כבר נדונה והוכרעה — השתק עילה או השתק פלוגתא.",
     "doctrine_id": "maase_beit_din"},
    {"label": "חוסר עילה / סילוק על הסף", "desc": "תקיפת התביעה כבר בכתב התביעה — תקנה 41-43.",
     "doctrine_id": "siluk_al_ha_saf"},
    {"label": "אי-מיצוי הליכים", "desc": "האם פוצה הפנייה הקודמת לרשות מוסמכת לפני הגשת התביעה?",
     "doctrine_id": None},
    {"label": "חוסר תום-לב מצד התובע", "desc": "טענת תום-הלב פועלת בשני הכיוונים — ניתן להפנותה נגד עמדת התובע.",
     "doctrine_id": "tom_lev_choze"},
]


def _flip_side(side: str) -> str:
    """Return the adversarial side."""
    s = (side or "").strip()
    if s in ("תובע", "plaintiff", "claimant"):
        return "defendant"
    if s in ("נתבע", "defendant", "respondent"):
        return "plaintiff"
    return "defendant"   # default attack from defendant's perspective


@app.post("/v1/argument/opposing-counsel")
def opposing_counsel(body: _OpposingCounselRequest,
                     request: Request):  # type: ignore
    """Adversarial analysis from the opposing counsel's perspective.
    Pro-gated at 8 credits."""
    claim = (body.claim or "").strip()
    if not claim:
        return JSONResponse(status_code=400,
                            content={"ok": False, "reason": "empty_claim"})

    # Tier gate — opposing_counsel is in pro/firm unlocked_endpoints
    user_email = (request.headers.get("X-User-Email") or "").strip() or None
    from ..middleware.entitlements import check_entitlement
    decision = check_entitlement(user_email, "opposing_counsel")
    if not decision.allowed:
        status = 402 if decision.reason == "credits_exhausted" else 403
        return JSONResponse(status_code=status, content={
            "ok": False,
            "reason": decision.reason,
            "entitlement": decision.to_dict(),
            "message_he": (
                "המנוי שלך הגיע למכסת הקרדיטים." if decision.reason == "credits_exhausted"
                else "🛡 Opposing Counsel הוא פיצ׳ר Pro Litigator בלבד "
                     "(₪899/חודש). שדרג כדי לחשוף את החולשות בטענה לפני "
                     "שהצד השני עושה את זה."
            ),
        })

    # Run synthesizer from the ADVERSARIAL side — give us their best shots
    try:
        from ..pipeline import get_pipeline
        from ..intelligence import StrategySynthesizer
        from ..hierarchical_graph import get_or_build_hgraph
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "reason": "import_failed",
            "error": f"{type(e).__name__}: {e}",
        })

    adversary_side = _flip_side(body.side)
    pipe = get_pipeline()
    named = getattr(pipe.retrievers, "_retrievers", {}) or {}
    cbr_r = named.get("hebrew_encoder") or pipe.retrievers
    syn = StrategySynthesizer(
        retriever=pipe.retrievers, cbr_retriever=cbr_r,
        full_text_loader=lambda did: pipe.get_text(did) or "",
        pipeline=pipe, polish_with_tau_llm=False,
    )

    # Synthesize the ATTACKING side's case
    attack_args: list = []
    try:
        r = syn.synthesize(user_facts=claim, side=adversary_side, top_k=body.top_k)
        d = r.to_dict() if hasattr(r, "to_dict") else dict(r)
        drafts = (d.get("case_based_arguments") or {}).get(
            "drafted_arguments_for_user") or []
        for arg in drafts[:8]:
            text = (arg.get("argument") or "").strip()
            sources = arg.get("source_cases") or []
            s0 = sources[0] if sources else {}
            case_id = (s0.get("title") or s0.get("case_id")
                       or arg.get("source_case_id"))
            if not case_id:
                case_id = _extract_citation_from_text(text) or "(ללא ציטוט)"
            attack_args.append({
                "text": text,
                "source_case": case_id,
                "strength": "high" if arg.get("score", 0) > 10 else "medium",
            })
    except Exception as e:
        print(f"[opposing_counsel] synth failed: {e}")

    # Doctrine match — gives us a known doctrine that the attacker can use
    counter_doctrines: list = []
    try:
        from ..intelligence.doctrine_classifier import classify_doctrines
        matches = classify_doctrines(claim, k=3, min_score=1.0)
        for m in matches:
            d_dict = m.to_dict()
            # For Opposing Counsel: surface the doctrine's EXCEPTIONS
            # (these are what the attacker uses to escape liability)
            from ..intelligence.doctrine_classifier import get_doctrine_by_id
            full = get_doctrine_by_id(m.doctrine_id) or {}
            d_dict["exceptions"] = full.get("exceptions") or []
            counter_doctrines.append(d_dict)
    except Exception as e:
        print(f"[opposing_counsel] doctrine match failed: {e}")

    # "Weaknesses" — derive from Tier A bundle's missing-facts
    weaknesses: list = []
    try:
        hg = get_or_build_hgraph(pipe)
        bundle = hg.build_argument(claim, side=body.side or "plaintiff")
        bd = bundle.to_dict()
        for fm in bd.get("fact_mapping") or []:
            if isinstance(fm, dict) and fm.get("covered") is False:
                w = fm.get("element") or fm.get("label") or ""
                if w:
                    weaknesses.append({
                        "fact": w,
                        "exploitation": (
                            "טיעון אדוורסרי יציין שעובדה זו חסרה — והעדרה "
                            "מחליש את הטענה מהותית."
                        ),
                    })
    except Exception as e:
        print(f"[opposing_counsel] weaknesses extraction failed: {e}")

    return {
        "ok": True,
        "claim": claim,
        "user_side": body.side or "תובע",
        "adversary_side": adversary_side,
        "attack_arguments": attack_args[:8],
        "procedural_attacks": _PROCEDURAL_ATTACK_VECTORS_HE[:4],
        "counter_doctrines": counter_doctrines[:3],
        "weaknesses_in_your_claim": weaknesses[:5],
        "verdict_he": (
            "🛡 מה הצד השני יטען נגדך — בלי לסנן. השתמש בזה כדי "
            "להכין תשובות לפני הדיון, לא במקום עורך-דין."
        ),
        "entitlement": decision.to_dict(),
        "meta": {
            "method": "adversarial_dual_frame",
            "n_attack_args": len(attack_args),
            "n_weaknesses": len(weaknesses),
        },
    }


# ──────────────────────────────────────────────────────────────────────
# v2.99.1 (Day 10) — Demo lead capture
#
# POST /v1/demo/lead — stores email + optional claim context from
# landing-page conversion form. SQLite-backed (`runtime/leads.db`),
# rate-limited per-IP (5 leads/hour to prevent spam).
#
# Used by the landing demo widget after results display:
#   "💌 רוצה את הניתוח המלא? תן email + נשלח אותו אליך"
# ──────────────────────────────────────────────────────────────────────

_LEADS_DB = "tau_rag/runtime/leads.db"
_LEAD_RATE_BUCKETS: Dict[str, deque] = {}
_LEAD_RATE_LIMIT = 5
_LEAD_RATE_WINDOW_SEC = 3600


class _DemoLeadRequest(BaseModel):  # type: ignore
    email: str
    claim: Optional[str] = ""
    name: Optional[str] = ""
    source: Optional[str] = "landing_demo"
    # The claim the user analyzed before submitting (for context/follow-up)
    last_analyzed_claim: Optional[str] = ""


def _ensure_leads_db():
    """Idempotent schema. Stored alongside other runtime DBs."""
    import sqlite3
    from pathlib import Path
    Path(_LEADS_DB).parent.mkdir(parents=True, exist_ok=True)
    with sqlite3.connect(_LEADS_DB, timeout=5.0) as c:
        c.execute("""
            CREATE TABLE IF NOT EXISTS leads (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                email TEXT NOT NULL,
                name TEXT,
                claim TEXT,
                last_analyzed_claim TEXT,
                source TEXT,
                ip TEXT,
                user_agent TEXT,
                created_at INTEGER NOT NULL
            )
        """)
        c.execute("CREATE INDEX IF NOT EXISTS idx_leads_email ON leads(email)")
        c.execute("CREATE INDEX IF NOT EXISTS idx_leads_ts ON leads(created_at)")
        # Day 28 — status + welcome tracking. Use ALTER TABLE so existing rows
        # default to 'new' and 0 (idempotent: silently ignore "duplicate column").
        for col, ddl in [
            ("status",         "TEXT DEFAULT 'new'"),
            ("sent_welcome",   "INTEGER DEFAULT 0"),
            ("welcome_method", "TEXT"),
            ("admin_note",     "TEXT"),
            ("updated_at",     "INTEGER"),
        ]:
            try:
                c.execute(f"ALTER TABLE leads ADD COLUMN {col} {ddl}")
            except Exception:
                pass
        c.execute("CREATE INDEX IF NOT EXISTS idx_leads_status ON leads(status)")


# Day 28 — Welcome email via Resend (if RESEND_API_KEY set), else queue file
_WELCOME_QUEUE_FILE = "tau_rag/runtime/welcome_queue.jsonl"


def _send_welcome_email(lead: Dict[str, Any]) -> str:
    """Try to send welcome email via Resend. Falls back to queue file.
    Returns 'sent' / 'queued' / 'failed'."""
    import json as _j
    from pathlib import Path

    email = (lead.get("email") or "").strip().lower()
    name  = (lead.get("name")  or "").strip()
    if not email:
        return "failed"

    # Build the email body in Hebrew
    greeting = f"שלום {name}," if name else "שלום,"
    subject = "ברוך/ה הבא/ה ל-Legal Eye — ניתוח הטענה שלך"
    text_body = (
        f"{greeting}\n\n"
        "תודה שניסית את Legal Eye!\n\n"
        "אנחנו עורכי-דין שבונים מנוע מודיעין משפטי בעברית — verbatim ממש "
        "מהפסיקה, אפס LLM חיצוני, אפס הזיות AI. הציון הציבורי שלנו "
        "מתעדכן אוטומטית כל שבוע:\n"
        "  https://legal-eye.1bigfam.com/eval\n\n"
        "מה הלאה?\n\n"
        "1. כנס לאפליקציה (3 שאלות חינמיות, ללא חשבון):\n"
        "   https://legal-eye.1bigfam.com\n\n"
        "2. תרצה לראות איך זה שונה מ-ChatGPT? השוואה צד-לצד:\n"
        "   https://legal-eye.1bigfam.com/compare\n\n"
        "3. תרצה להצטרף ל-Founding 50? ₪199 לכל החיים, 50 מקומות בלבד:\n"
        "   https://legal-eye.1bigfam.com/landing#pricing\n\n"
        "ענה לאימייל הזה עם כל שאלה — אני קורא אישית.\n\n"
        "אברי ברזל, עו\"ד\n"
        "Founder, Legal Eye\n"
        "avribarzel@gmail.com\n"
    )

    api_key = _os.environ.get("RESEND_API_KEY", "").strip()
    if api_key:
        try:
            import urllib.request as _urlr, urllib.error as _urle
            req = _urlr.Request(
                "https://api.resend.com/emails",
                data=_j.dumps({
                    "from":    "Legal Eye <welcome@legal-eye.1bigfam.com>",
                    "to":      [email],
                    "subject": subject,
                    "text":    text_body,
                    "reply_to": "avribarzel@gmail.com",
                }).encode("utf-8"),
                headers={
                    "Authorization": f"Bearer {api_key}",
                    "Content-Type": "application/json",
                },
                method="POST",
            )
            with _urlr.urlopen(req, timeout=10) as resp:
                _ = resp.read()
            return "sent"
        except _urle.HTTPError as e:
            print(f"[welcome] Resend HTTP {e.code}: {e.read().decode('utf-8', 'ignore')[:200]}")
        except Exception as e:
            print(f"[welcome] Resend send failed: {e}")
        # fall through to queue

    # Queue mode — user reads this file periodically and sends manually
    try:
        Path(_WELCOME_QUEUE_FILE).parent.mkdir(parents=True, exist_ok=True)
        with open(_WELCOME_QUEUE_FILE, "a", encoding="utf-8") as f:
            f.write(_j.dumps({
                "email":   email,
                "name":    name,
                "subject": subject,
                "body":    text_body,
                "queued_at": int(_time.time()),
            }, ensure_ascii=False) + "\n")
        return "queued"
    except Exception as e:
        print(f"[welcome] queue write failed: {e}")
        return "failed"


@app.post("/v1/demo/lead")
def demo_lead(body: _DemoLeadRequest, request: Request):  # type: ignore
    """Captures landing-page demo email leads to leads.db."""
    import sqlite3
    import time as _ti

    email = (body.email or "").strip().lower()
    # Light email-shape check (full RFC is overkill)
    if not email or "@" not in email or "." not in email.split("@", 1)[-1]:
        return JSONResponse(status_code=400, content={
            "ok": False, "reason": "invalid_email",
            "message_he": "כתובת אימייל לא תקינה."
        })

    fwd = request.headers.get("X-Forwarded-For", "")
    ip = (fwd.split(",")[0].strip() if fwd
          else (request.client.host if request.client else "unknown"))
    ua = (request.headers.get("User-Agent") or "")[:200]

    # Rate limit per IP
    bucket = _LEAD_RATE_BUCKETS.setdefault(ip, deque())
    now = _ti.time()
    while bucket and bucket[0] < now - _LEAD_RATE_WINDOW_SEC:
        bucket.popleft()
    if len(bucket) >= _LEAD_RATE_LIMIT:
        return JSONResponse(status_code=429, content={
            "ok": False, "reason": "rate_limit",
            "message_he": "יותר מדי הרשמות מ-IP זה. נסה שוב בעוד שעה."
        })
    bucket.append(now)

    _ensure_leads_db()
    try:
        with sqlite3.connect(_LEADS_DB, timeout=5.0) as c:
            cur = c.execute("""
                INSERT INTO leads
                    (email, name, claim, last_analyzed_claim, source,
                     ip, user_agent, created_at, status, updated_at)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'new', ?)
            """, (
                email,
                (body.name or "")[:120],
                (body.claim or "")[:600],
                (body.last_analyzed_claim or "")[:600],
                (body.source or "landing_demo")[:60],
                ip,
                ua,
                int(now),
                int(now),
            ))
            new_id = cur.lastrowid
    except Exception as e:
        print(f"[demo/lead] db write failed: {e}")
        return JSONResponse(status_code=500, content={
            "ok": False, "reason": "db_error",
            "message_he": "שגיאה זמנית בשמירת ההודעה. נסה שוב או שלח אימייל ידנית ל-support@legal-eye.app"
        })

    print(f"[demo/lead] captured: {email} (source={body.source} ip={ip})")

    # Day 28 — fire welcome email (via Resend if configured, else queue)
    try:
        welcome_status = _send_welcome_email({
            "email": email,
            "name":  body.name or "",
        })
        if welcome_status in ("sent", "queued"):
            with sqlite3.connect(_LEADS_DB, timeout=5.0) as c:
                c.execute("""
                    UPDATE leads SET sent_welcome=1, welcome_method=?, updated_at=?
                    WHERE id=?
                """, (welcome_status, int(_time.time()), new_id))
        print(f"[demo/lead] welcome {welcome_status} for {email}")
    except Exception as e:
        # Don't fail the lead capture if welcome send fails
        print(f"[demo/lead] welcome send failed (non-fatal): {e}")

    return {
        "ok": True,
        "message_he": (
            "תודה! שלחנו אימייל קצר עם קישור לאפליקציה. "
            "אם הוא לא הגיע — בדוק בספאם, או צור קשר ישיר ב-avribarzel@gmail.com."
        ),
    }


@app.get("/v1/admin/leads")
def admin_leads_list(request: Request, limit: int = 100, status: Optional[str] = None):  # type: ignore
    """Admin read of captured leads. Use this to follow up Founding 50
    conversions: each lead is a warm prospect who saw real results.
    Optional `status` filter: new / contacted / converted / churned."""
    if not _check_admin_key(request):
        return JSONResponse(status_code=403,
                            content={"ok": False, "reason": "admin_required"})
    import sqlite3
    _ensure_leads_db()
    try:
        with sqlite3.connect(_LEADS_DB, timeout=5.0) as c:
            c.row_factory = sqlite3.Row
            if status and status in ("new", "contacted", "converted", "churned"):
                cur = c.execute("""
                    SELECT id, email, name, claim, last_analyzed_claim, source,
                           created_at, status, sent_welcome, welcome_method,
                           admin_note, updated_at
                    FROM leads
                    WHERE status = ?
                    ORDER BY created_at DESC
                    LIMIT ?
                """, (status, max(1, min(limit, 500))))
                rows = [dict(r) for r in cur.fetchall()]
                # Get counts by status for summary
                counts = {r["status"]: r["n"] for r in c.execute(
                    "SELECT status, COUNT(*) AS n FROM leads GROUP BY status"
                ).fetchall()}
                return {"ok": True, "n": len(rows), "leads": rows, "counts": counts}
            cur = c.execute("""
                SELECT id, email, name, claim, last_analyzed_claim, source,
                       created_at, status, sent_welcome, welcome_method,
                       admin_note, updated_at
                FROM leads
                ORDER BY created_at DESC
                LIMIT ?
            """, (max(1, min(limit, 500)),))
            rows = [dict(r) for r in cur.fetchall()]
            counts = {r["status"]: r["n"] for r in c.execute(
                "SELECT status, COUNT(*) AS n FROM leads GROUP BY status"
            ).fetchall()}
        return {"ok": True, "n": len(rows), "leads": rows, "counts": counts}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


# Day 28 — Admin can update lead status + add a note. Status transitions:
#   new → contacted → converted (or churned)
class _LeadUpdateRequest(BaseModel):  # type: ignore
    id: int
    status: Optional[str] = None
    admin_note: Optional[str] = None


@app.post("/v1/admin/leads/update")
def admin_leads_update(body: _LeadUpdateRequest, request: Request):  # type: ignore
    """Update a lead's status and/or note."""
    if not _check_admin_key(request):
        return JSONResponse(status_code=403,
                            content={"ok": False, "reason": "admin_required"})
    import sqlite3
    _ensure_leads_db()
    fields, values = [], []
    if body.status and body.status in ("new", "contacted", "converted", "churned"):
        fields.append("status = ?")
        values.append(body.status)
    if body.admin_note is not None:
        fields.append("admin_note = ?")
        values.append((body.admin_note or "")[:500])
    if not fields:
        return JSONResponse(status_code=400, content={
            "ok": False, "reason": "no_fields_to_update"
        })
    fields.append("updated_at = ?")
    values.append(int(_time.time()))
    values.append(int(body.id))
    try:
        with sqlite3.connect(_LEADS_DB, timeout=5.0) as c:
            cur = c.execute(
                f"UPDATE leads SET {', '.join(fields)} WHERE id = ?",
                tuple(values),
            )
            if cur.rowcount == 0:
                return JSONResponse(status_code=404, content={
                    "ok": False, "reason": "lead_not_found"
                })
        return {"ok": True, "updated": cur.rowcount}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


@app.get("/v1/admin/welcome_queue")
def admin_welcome_queue(request: Request):  # type: ignore
    """If RESEND_API_KEY is not set, this returns the pending welcome
    emails that need manual sending. Each line is one email."""
    if not _check_admin_key(request):
        return JSONResponse(status_code=403,
                            content={"ok": False, "reason": "admin_required"})
    from pathlib import Path
    p = Path(_WELCOME_QUEUE_FILE)
    if not p.exists():
        return {"ok": True, "n_queued": 0, "queue": [],
                "resend_configured": bool(_os.environ.get("RESEND_API_KEY"))}
    items = []
    try:
        import json as _j
        with open(p, encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    items.append(_j.loads(line))
                except Exception:
                    pass
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })
    return {
        "ok": True,
        "n_queued": len(items),
        "queue": items[-50:],   # last 50
        "resend_configured": bool(_os.environ.get("RESEND_API_KEY")),
    }


# ──────────────────────────────────────────────────────────────────────
# Day 17 — Founding 50 scarcity + lightweight analytics
#
# Two small, file-based subsystems that share a sqlite DB at
# `runtime/funnel.db`:
#   • `founding50` (single row) — tracks how many of the 50 Founding
#     slots are claimed. Updated by the admin as deals close.
#     Public read endpoint powers the urgency widget on the landing.
#   • `events` — minimal event log: page_view, cta_click, demo_started,
#     demo_completed, email_submitted. POSTed from the static pages,
#     visible to the admin via /v1/admin/funnel.
# ──────────────────────────────────────────────────────────────────────
_FUNNEL_DB = "tau_rag/runtime/funnel.db"
_FOUNDING_TOTAL = 50
_TRACK_RATE_BUCKETS: Dict[str, deque] = {}
_TRACK_RATE_LIMIT = 60         # 60 events/min/IP — generous
_TRACK_RATE_WINDOW_SEC = 60


def _ensure_funnel_db():
    import sqlite3
    from pathlib import Path
    Path(_FUNNEL_DB).parent.mkdir(parents=True, exist_ok=True)
    with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c:
        c.execute("""
            CREATE TABLE IF NOT EXISTS founding50 (
                id INTEGER PRIMARY KEY CHECK (id = 1),
                taken INTEGER NOT NULL DEFAULT 0,
                updated_at INTEGER NOT NULL
            )
        """)
        c.execute("""
            INSERT OR IGNORE INTO founding50 (id, taken, updated_at)
            VALUES (1, 0, ?)
        """, (int(_time.time()),))
        c.execute("""
            CREATE TABLE IF NOT EXISTS events (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                event TEXT NOT NULL,
                page TEXT,
                props TEXT,
                ip_hash TEXT,
                referrer TEXT,
                ua TEXT,
                created_at INTEGER NOT NULL
            )
        """)
        c.execute("CREATE INDEX IF NOT EXISTS idx_ev_ts ON events(created_at)")
        c.execute("CREATE INDEX IF NOT EXISTS idx_ev_event ON events(event)")


@app.get("/v1/founding50/status")
def founding50_status():  # type: ignore
    """Public counter for the urgency widget. Returns taken + spots_left."""
    import sqlite3
    _ensure_funnel_db()
    try:
        with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c:
            row = c.execute(
                "SELECT taken, updated_at FROM founding50 WHERE id=1"
            ).fetchone()
        taken = int(row[0]) if row else 0
        updated_at = int(row[1]) if row else int(_time.time())
        taken = max(0, min(taken, _FOUNDING_TOTAL))
        return {
            "ok": True,
            "taken": taken,
            "total": _FOUNDING_TOTAL,
            "spots_left": _FOUNDING_TOTAL - taken,
            "updated_at": updated_at,
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


class _Founding50SetRequest(BaseModel):  # type: ignore
    taken: int


@app.post("/v1/admin/founding50/set")
def admin_founding50_set(body: _Founding50SetRequest, request: Request):  # type: ignore
    """Admin updates the count as deals close."""
    if not _check_admin_key(request):
        return JSONResponse(status_code=403,
                            content={"ok": False, "reason": "admin_required"})
    import sqlite3
    _ensure_funnel_db()
    taken = max(0, min(int(body.taken), _FOUNDING_TOTAL))
    try:
        with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c:
            c.execute(
                "UPDATE founding50 SET taken=?, updated_at=? WHERE id=1",
                (taken, int(_time.time())),
            )
        return {"ok": True, "taken": taken, "total": _FOUNDING_TOTAL,
                "spots_left": _FOUNDING_TOTAL - taken}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


class _TrackEventRequest(BaseModel):  # type: ignore
    event: str
    page: Optional[str] = ""
    props: Optional[Dict[str, Any]] = None
    referrer: Optional[str] = ""


@app.post("/v1/track")
def track_event(body: _TrackEventRequest, request: Request):  # type: ignore
    """Logs a single funnel event. Public — rate-limited per IP. Stores
    only an IP HASH (not raw IP) for privacy."""
    import sqlite3, json as _j, hashlib

    ev = (body.event or "").strip()[:60]
    if not ev:
        return JSONResponse(status_code=400, content={
            "ok": False, "reason": "missing_event"
        })

    fwd = request.headers.get("X-Forwarded-For", "")
    ip = (fwd.split(",")[0].strip() if fwd
          else (request.client.host if request.client else "unknown"))
    ip_hash = hashlib.sha256(("le-track:" + ip).encode()).hexdigest()[:16]

    # Rate limit per IP (counts on raw IP, not hash, so we don't have to
    # reverse-hash on each request)
    bucket = _TRACK_RATE_BUCKETS.setdefault(ip, deque())
    now = _time.time()
    while bucket and bucket[0] < now - _TRACK_RATE_WINDOW_SEC:
        bucket.popleft()
    if len(bucket) >= _TRACK_RATE_LIMIT:
        return JSONResponse(status_code=429, content={
            "ok": False, "reason": "rate_limit"
        })
    bucket.append(now)

    ua = (request.headers.get("User-Agent") or "")[:200]
    referrer = (body.referrer or request.headers.get("Referer") or "")[:200]
    props_json = ""
    try:
        if body.props is not None:
            # Cap size to avoid abuse
            props_json = _j.dumps(body.props, ensure_ascii=False)[:500]
    except Exception:
        pass

    _ensure_funnel_db()
    try:
        with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c:
            c.execute("""
                INSERT INTO events
                    (event, page, props, ip_hash, referrer, ua, created_at)
                VALUES (?, ?, ?, ?, ?, ?, ?)
            """, (
                ev, (body.page or "")[:120],
                props_json, ip_hash, referrer, ua, int(now),
            ))
    except Exception as e:
        print(f"[track] db write failed: {e}")
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })

    return {"ok": True}


@app.get("/v1/admin/funnel")
def admin_funnel(request: Request, hours: int = 24):  # type: ignore
    """Admin summary: event counts, per-page funnels, conversion ratios."""
    if not _check_admin_key(request):
        return JSONResponse(status_code=403,
                            content={"ok": False, "reason": "admin_required"})
    import sqlite3
    _ensure_funnel_db()
    hours = max(1, min(int(hours), 24 * 30))
    cutoff = int(_time.time()) - hours * 3600
    try:
        with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c:
            c.row_factory = sqlite3.Row
            by_event = [dict(r) for r in c.execute("""
                SELECT event, COUNT(*) AS n
                FROM events WHERE created_at >= ?
                GROUP BY event ORDER BY n DESC
            """, (cutoff,)).fetchall()]
            by_page = [dict(r) for r in c.execute("""
                SELECT page, COUNT(*) AS n
                FROM events
                WHERE created_at >= ? AND event = 'page_view'
                GROUP BY page ORDER BY n DESC
            """, (cutoff,)).fetchall()]
            uniques = c.execute("""
                SELECT COUNT(DISTINCT ip_hash) FROM events
                WHERE created_at >= ?
            """, (cutoff,)).fetchone()[0]
            recent = [dict(r) for r in c.execute("""
                SELECT event, page, props, referrer, created_at
                FROM events WHERE created_at >= ?
                ORDER BY created_at DESC LIMIT 50
            """, (cutoff,)).fetchall()]

        # Conversion ratios — same window
        ev_map = {r["event"]: r["n"] for r in by_event}
        return {
            "ok": True,
            "window_hours": hours,
            "unique_visitors_ip_hash": uniques,
            "by_event": by_event,
            "by_page": by_page,
            "funnel": {
                "page_views":      ev_map.get("page_view", 0),
                "demo_started":    ev_map.get("demo_started", 0),
                "demo_completed":  ev_map.get("demo_completed", 0),
                "email_submitted": ev_map.get("email_submitted", 0),
            },
            "recent_events": recent,
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


# ──────────────────────────────────────────────────────────────────────
# Day 23 — Public doctrine catalog summary endpoint
#
# Exposes the curated doctrine catalog so external clients (the
# landing scorecard, the marketing materials, third-party auditors)
# can verify how many doctrines are curated and what the anchors are.
# Returns a flattened summary — `id`, `name_he`, `name_en`, `domain`,
# `anchor_case`, `n_leading_cases` — not the full schema (elements,
# exceptions, keywords stay internal).
# ──────────────────────────────────────────────────────────────────────
class _DoctrineClassifyRequest(BaseModel):  # type: ignore
    text: str
    k: int = 3
    min_score: float = 1.0


@app.post("/v1/doctrines/classify")
def doctrines_classify(body: _DoctrineClassifyRequest):  # type: ignore
    """Day 49 — classify a free-text query against the doctrine catalog.

    Returns the top-k matching doctrines with their metadata so the
    frontend can render a "related doctrines" panel inline with the
    lawyer-ask result (topic-dossier UX).
    """
    if not (body.text or "").strip():
        return {"ok": True, "matches": [], "n_total": 0}
    try:
        from ..intelligence.doctrine_classifier import (
            classify_doctrines, load_doctrine_catalog,
        )
        cat = load_doctrine_catalog()
        idx = {d["id"]: d for d in cat.get("doctrines", [])}
        matches = classify_doctrines(body.text, k=body.k,
                                      min_score=body.min_score)
        out = []
        for m in matches:
            doc = idx.get(m.doctrine_id, {})
            leading = (doc.get("leading_cases") or [])[:1]
            anchor_case = leading[0].get("citation") if leading else None
            anchor_year = leading[0].get("year") if leading else None
            out.append({
                "id":          m.doctrine_id,
                "name_he":     m.name_he,
                "domain":      m.domain,
                "score":       round(m.score, 2),
                "anchor_case": anchor_case,
                "anchor_year": anchor_year,
                "statute":     ((doc.get("statute_refs") or [{}])[0]
                                .get("law") if doc.get("statute_refs") else None),
                "pending_review": bool(doc.get("_pending_lawyer_review")),
                # Day 49 Phase 2.5 — refinement chips. Pre-curated Hebrew
                # sub-topic phrases per doctrine. Frontend appends to the
                # user's question and re-submits to narrow the topic.
                "refinements": doc.get("refinements") or [],
            })
        return {"ok": True, "matches": out, "n_total": len(out)}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


@app.get("/v1/doctrines/catalog")
def doctrines_catalog_public():  # type: ignore
    """Public read-only summary of the curated doctrine catalog."""
    try:
        from ..intelligence.doctrine_classifier import load_doctrine_catalog
        cat = load_doctrine_catalog()
        doctrines = cat.get("doctrines", []) or []
        items = []
        for d in doctrines:
            leading = d.get("leading_cases") or []
            items.append({
                "id":               d.get("id"),
                "name_he":          d.get("name_he"),
                "name_en":          d.get("name_en"),
                "domain":           d.get("domain"),
                "anchor_case":      (leading[0].get("citation") if leading else None),
                "anchor_year":      (leading[0].get("year") if leading else None),
                "n_leading_cases":  len(leading),
                "n_statute_refs":   len(d.get("statute_refs") or []),
            })
        # Domain breakdown
        from collections import Counter as _Counter
        by_domain = dict(_Counter(it["domain"] for it in items if it["domain"]))
        return {
            "ok":            True,
            "n_doctrines":   len(items),
            "by_domain":     by_domain,
            "doctrines":     items,
            "_schema_version": cat.get("_schema_version"),
            "_disclaimer":   cat.get("_disclaimer"),
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


@app.get("/v1/doctrines/{doctrine_id}")
def doctrines_get_one(doctrine_id: str):  # type: ignore
    """Day 49 — full per-doctrine details for the dossier expand-on-click.

    Returns name_he, elements, exceptions, statute_refs, all leading_cases,
    and the `pending_lawyer_review` flag. Keywords stay internal —
    they're a tuning surface, not user-facing.

    NOTE: must be registered AFTER /v1/doctrines/catalog so the
    static path wins over the dynamic match.
    """
    try:
        from ..intelligence.doctrine_classifier import load_doctrine_catalog
        cat = load_doctrine_catalog()
        idx = {d["id"]: d for d in cat.get("doctrines", [])}
        doc = idx.get(doctrine_id)
        if not doc:
            return JSONResponse(status_code=404, content={
                "ok": False, "reason": "doctrine_not_found",
            })
        return {
            "ok":               True,
            "id":               doc.get("id"),
            "name_he":          doc.get("name_he"),
            "name_en":          doc.get("name_en"),
            "domain":           doc.get("domain"),
            "elements":         doc.get("elements") or [],
            "exceptions":       doc.get("exceptions") or [],
            "statute_refs":     doc.get("statute_refs") or [],
            "leading_cases":    doc.get("leading_cases") or [],
            "pending_review":   bool(doc.get("_pending_lawyer_review")),
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


# ──────────────────────────────────────────────────────────────────────
# Day 51 — Judgment Reader: structured render of a single court ruling.
#
# Takes the raw doc text (from pipeline.get_text), cleans the corpus
# prefixes, and parses out:
#   - title, court, parties, judges, decision_date
#   - body paragraphs (numbered where possible)
#   - inline citations (extracted, dedupe)
#   - source links (Nevo, courts.gov.il where the citation kind permits)
#
# Returned JSON powers the frontend reader modal (Hebrew serif
# typography, linkified citations, sticky paragraph numbers).
# ──────────────────────────────────────────────────────────────────────

# Strip the same corpus prefix the hierarchical_graph uses.
import re as _re  # module-level alias for the _JR_* regex bundle below
_JR_BRACKET_HEADER_RE = _re.compile(r"^(?:\s*\[[^\]]*\]\s*)+", flags=_re.UNICODE)
_JR_PROSE_MARKER_RE = _re.compile(
    r"^\s*פסיק[הת]\s*[—–\-:]\s*[^:]+\)\s*:\s*", flags=_re.UNICODE)
# Citation regex — matches ע"א 1234/97, בג"ץ 1234/95, etc.
_JR_CASE_RX = _re.compile(
    r'((?:בג"?ץ|ע"?א|ע"?פ|רע"?א|רע"?פ|דנ"?א|דנ"?פ|בש"?א|בש"?פ|דב"?ע)\s*\d+\s*[/\-]\s*\d{2,4})',
    flags=_re.UNICODE,
)
# Paragraph splitter: blank line OR Hebrew letter+dot+space at line start
_JR_PARA_RX = _re.compile(r"\n\s*\n|\n(?=\d+\.\s)|\n(?=[א-י][.)]\s)")


def _fetch_from_wikisource(cite: str) -> Optional[dict]:
    """Day 52 — pull a judgment from Hebrew Wikisource.

    Wikisource is community-transcribed primary sources. ~500-1000
    famous Israeli rulings are available there in clean Hebrew text
    with structure (header / parties / judges / body). Open access,
    no anti-bot, no copyright concerns (court rulings are public
    domain; the transcription is CC-BY-SA).

    Returns dict in same shape as judgment_reader_render OR None
    if the citation isn't on Wikisource.
    """
    import urllib.request, urllib.parse, json
    import re as _r
    headers = {"User-Agent": "Mozilla/5.0 (compatible; LegalEyeBot/1.0; +https://legal-eye.1bigfam.com)"}
    # Step 1: opensearch for the citation
    q = urllib.parse.quote(cite)
    try:
        url = (f"https://he.wikisource.org/w/api.php?action=opensearch"
               f"&search={q}&limit=3&format=json")
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=15) as r:
            data = json.loads(r.read())
        titles = data[1] if len(data) > 1 else []
        if not titles:
            return None
    except Exception:
        return None

    # Step 2: parse the FIRST matching page
    title = titles[0]
    try:
        ptitle = urllib.parse.quote(title)
        purl = (f"https://he.wikisource.org/w/api.php?action=parse"
                f"&page={ptitle}&prop=text|wikitext&format=json")
        req = urllib.request.Request(purl, headers=headers)
        with urllib.request.urlopen(req, timeout=15) as r:
            pdata = json.loads(r.read())
    except Exception:
        return None

    html = (pdata.get("parse", {}).get("text", {}) or {}).get("*", "")
    if not html:
        return None

    # Strip MediaWiki UI cruft + nested HTML
    html = _r.sub(r"<script[^>]*>.*?</script>", "", html, flags=_r.DOTALL)
    html = _r.sub(r"<style[^>]*>.*?</style>",   "", html, flags=_r.DOTALL)
    html = _r.sub(r'<span class="mw-editsection".*?</span>', "", html, flags=_r.DOTALL)
    html = _r.sub(r'<div class="navbox.*?</div>',            "", html, flags=_r.DOTALL)
    # Get plain text (preserves Hebrew + line breaks at <p> / <br>)
    html = _r.sub(r"<br\s*/?>", "\n", html)
    html = _r.sub(r"</p>",      "\n\n", html)
    cleaned = _r.sub(r"<[^>]+>", "", html)
    cleaned = _r.sub(r"\n{3,}", "\n\n", cleaned).strip()
    if len(cleaned) < 500:
        return None  # too short — probably not a real ruling

    # Re-use our paragraph splitter logic from the corpus render
    paragraphs = []
    for chunk in _JR_PARA_RX.split(cleaned):
        t = chunk.strip()
        if not t:
            continue
        num_match = _r.match(r"^((?:\d+\.|[א-י][.)])\s)(.+)", t, _r.DOTALL)
        if num_match:
            paragraphs.append({"num": num_match.group(1).strip(),
                                "text": num_match.group(2).strip()})
        else:
            paragraphs.append({"num": None, "text": t})

    citations = []
    seen = set()
    for m in _JR_CASE_RX.finditer(cleaned):
        c = m.group(1).strip()
        if c not in seen:
            seen.add(c); citations.append(c)

    from urllib.parse import quote as _q
    nevo_q = _q(f'site:nevo.co.il "{cite}"')
    courts_q = _q(f'"{cite}"')
    return {
        "ok":            True,
        "doc_id":        f"wikisource:{title}",
        "citation":      cite,
        "court":         None,
        "source":        "wikisource",
        "source_url":    f"https://he.wikisource.org/wiki/{urllib.parse.quote(title)}",
        "source_title":  title,
        "n_paragraphs":  len(paragraphs),
        "n_citations":   len(citations),
        "n_chars":       len(cleaned),
        "paragraphs":    paragraphs[:200],
        "citations":     citations[:60],
        "links": {
            "nevo":   f"https://www.google.com/search?q={nevo_q}",
            "courts": f"https://www.google.com/search?q={courts_q}",
        },
    }


class _FromHtmlRequest(BaseModel):  # type: ignore
    cite: str
    url: str
    html: str


@app.post("/v1/judgment/from_html")
def judgment_reader_from_html(body: _FromHtmlRequest):  # type: ignore
    """Day 54 — receive raw HTML scraped by the Chrome extension and
    parse it into our standard reader shape.

    The extension fetches a URL using the user's real browser session
    (bypassing anti-bot). It sends the HTML here for server-side
    cleanup + structure extraction (we don't want to keep that logic
    in the extension — too easy for it to drift).

    Returns the same shape as /v1/judgment/by-cite when source resolves.
    """
    import re as _r
    try:
        cite = (body.cite or "").strip()
        url  = (body.url or "").strip()
        html = body.html or ""
        if not html or len(html) < 200:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "html_too_short",
            })

        # Identify the source from the URL (controls labeling)
        host = ""
        try:
            from urllib.parse import urlparse
            host = urlparse(url).hostname or ""
        except Exception:
            pass
        SOURCE_NAMES = {
            "www.court.gov.il":   "court.gov.il",
            "supreme.court.gov.il": "supreme.court.gov.il",
            "www.pador.co.il":    "pador.co.il",
            "www.psakdin.co.il":  "psakdin.co.il",
            "www.takdin.co.il":   "takdin.co.il",
            "www.lawdata.co.il":  "lawdata.co.il",
        }
        source = SOURCE_NAMES.get(host, host or "external")

        # Strip script/style/nav cruft aggressively
        html = _r.sub(r"<script[^>]*>.*?</script>", "", html, flags=_r.DOTALL | _r.IGNORECASE)
        html = _r.sub(r"<style[^>]*>.*?</style>",   "", html, flags=_r.DOTALL | _r.IGNORECASE)
        html = _r.sub(r"<noscript[^>]*>.*?</noscript>", "", html, flags=_r.DOTALL | _r.IGNORECASE)
        html = _r.sub(r"<nav[^>]*>.*?</nav>",       "", html, flags=_r.DOTALL | _r.IGNORECASE)
        html = _r.sub(r"<header[^>]*>.*?</header>", "", html, flags=_r.DOTALL | _r.IGNORECASE)
        html = _r.sub(r"<footer[^>]*>.*?</footer>", "", html, flags=_r.DOTALL | _r.IGNORECASE)
        html = _r.sub(r"<aside[^>]*>.*?</aside>",   "", html, flags=_r.DOTALL | _r.IGNORECASE)
        html = _r.sub(r"<!--.*?-->",                "", html, flags=_r.DOTALL)
        # Preserve paragraph breaks before stripping tags
        html = _r.sub(r"<br\s*/?>", "\n", html, flags=_r.IGNORECASE)
        html = _r.sub(r"</p>",      "\n\n", html, flags=_r.IGNORECASE)
        html = _r.sub(r"</div>",    "\n", html, flags=_r.IGNORECASE)
        html = _r.sub(r"</?h[1-6][^>]*>", "\n\n", html, flags=_r.IGNORECASE)
        # Decode common HTML entities
        cleaned = _r.sub(r"<[^>]+>", "", html)
        cleaned = (cleaned
                   .replace("&nbsp;", " ")
                   .replace("&amp;",  "&")
                   .replace("&lt;",   "<")
                   .replace("&gt;",   ">")
                   .replace("&quot;", '"')
                   .replace("&#39;",  "'")
                   .replace("&#34;",  '"'))
        # Collapse whitespace
        cleaned = _r.sub(r"[ \t]+", " ", cleaned)
        cleaned = _r.sub(r"\n{3,}", "\n\n", cleaned).strip()

        if len(cleaned) < 400:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "no_content_after_strip",
                "n_chars_after_strip": len(cleaned),
            })

        # v2.99.66 — detect anti-bot block pages BEFORE parsing them as
        # judgment text. court.gov.il + several aggregators serve a
        # Hebrew block page that contains words like "פעילות בלתי
        # מורשת", "Case Number:", "Client IP:". We must NOT render that
        # as the judgment — return a structured error so the frontend
        # can tell the user clearly + try a different source.
        BLOCK_PATTERNS = [
            r"פעילות\s+בלתי[\s־]+מורשת",
            r"Case\s+Number\s*:\s*\d{10,}",
            r"גישתך\s+לאתר\s+נמנעה",
            r"מערכת\s+ההגנה",
            r"Access\s+Denied",
            r"You\s+have\s+been\s+blocked",
            r"Cloudflare\s+Ray\s+ID",
            r"Just\s+a\s+moment\s*\.{3}",
            r"Please\s+complete\s+the\s+security\s+check",
        ]
        head_for_check = cleaned[:2000]
        for pat in BLOCK_PATTERNS:
            if _r.search(pat, head_for_check):
                return JSONResponse(status_code=502, content={
                    "ok": False, "reason": "upstream_anti_bot",
                    "source": source,
                    "source_url": url,
                    "block_pattern": pat,
                    "human_message":
                        f"האתר {source} חסם את הבקשה. ייתכן שיש"
                        f" יותר מדי בקשות מה-IP שלך — נסה שוב מאוחר"
                        f" יותר, או חפש את הפסק במנוע חיפוש.",
                })

        # Heuristic: the actual judgment usually starts at a Hebrew
        # paragraph mentioning "בית המשפט" / "בפני" / "פסק דין" —
        # trim everything before that to remove nav/header noise.
        markers = [r"בית[\s־]+המשפט", r"\bבפני\b", r"\bפסק[\s־]+דין\b",
                   r"\bהחלטה\b", r"\bבעניין\b"]
        for mk in markers:
            m = _r.search(mk, cleaned)
            if m and m.start() > 200:
                cleaned = cleaned[m.start():]
                break

        # Paragraphs
        paragraphs = []
        for chunk in _JR_PARA_RX.split(cleaned):
            t = chunk.strip()
            if not t or len(t) < 20:
                continue
            num_match = _r.match(r"^((?:\d+\.|[א-י][.)])\s)(.+)", t, _r.DOTALL)
            if num_match:
                paragraphs.append({"num": num_match.group(1).strip(),
                                    "text": num_match.group(2).strip()})
            else:
                paragraphs.append({"num": None, "text": t})

        # Inline citations
        citations = []
        seen = set()
        for m in _JR_CASE_RX.finditer(cleaned):
            c = m.group(1).strip()
            if c not in seen:
                seen.add(c); citations.append(c)

        from urllib.parse import quote as _q
        nevo_q   = _q('site:nevo.co.il "' + cite + '"')
        courts_q = _q('"' + cite + '"')
        return {
            "ok":            True,
            "doc_id":        f"{source}:{cite}",
            "citation":      cite or None,
            "court":         None,
            "source":        source,
            "source_url":    url,
            "source_title":  None,
            "n_paragraphs":  len(paragraphs),
            "n_citations":   len(citations),
            "n_chars":       len(cleaned),
            "paragraphs":    paragraphs[:200],
            "citations":     citations[:60],
            "links": {
                "nevo":   f"https://www.google.com/search?q={nevo_q}",
                "courts": f"https://www.google.com/search?q={courts_q}",
            },
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


def _fetch_from_wikipedia(cite: str) -> Optional[dict]:
    """Day 52b — pull a judgment summary from Hebrew Wikipedia.

    Wikipedia articles ABOUT famous Israeli rulings (e.g. "הלכת
    אפרופים", "בג\"ץ קול העם") usually have:
      - the citation in the title or first paragraph
      - quoted passages from the judgment
      - background, summary, and influence

    Not as good as the full text (Wikisource is preferred), but
    covers cases that aren't transcribed on Wikisource. Useful as
    a secondary source. The reader footer surfaces the attribution.
    """
    import urllib.request, urllib.parse, json
    import re as _r
    headers = {"User-Agent": "Mozilla/5.0 (compatible; LegalEyeBot/1.0; +https://legal-eye.1bigfam.com)"}

    # Step 1: opensearch on the bare citation
    q = urllib.parse.quote(cite)
    try:
        url = (f"https://he.wikipedia.org/w/api.php?action=opensearch"
               f"&search={q}&limit=5&format=json")
        req = urllib.request.Request(url, headers=headers)
        with urllib.request.urlopen(req, timeout=15) as r:
            data = json.loads(r.read())
        titles = data[1] if len(data) > 1 else []
        if not titles:
            return None
        title = titles[0]
    except Exception:
        return None

    # Step 2: parse the page
    try:
        ptitle = urllib.parse.quote(title)
        purl = (f"https://he.wikipedia.org/w/api.php?action=parse"
                f"&page={ptitle}&prop=text&format=json")
        req = urllib.request.Request(purl, headers=headers)
        with urllib.request.urlopen(req, timeout=15) as r:
            pdata = json.loads(r.read())
    except Exception:
        return None

    html = (pdata.get("parse", {}).get("text", {}) or {}).get("*", "")
    if not html:
        return None

    # Wikipedia articles have a LOT of UI cruft — strip aggressively.
    html = _r.sub(r"<script[^>]*>.*?</script>",                   "", html, flags=_r.DOTALL)
    html = _r.sub(r"<style[^>]*>.*?</style>",                     "", html, flags=_r.DOTALL)
    html = _r.sub(r'<span class="mw-editsection".*?</span>',      "", html, flags=_r.DOTALL)
    html = _r.sub(r'<div class="(?:navbox|metadata|infobox|hatnote|reflist|references|thumb).*?</div>', "", html, flags=_r.DOTALL)
    html = _r.sub(r'<table[^>]*class="[^"]*(?:infobox|navbox|wikitable)[^"]*".*?</table>', "", html, flags=_r.DOTALL)
    html = _r.sub(r'<sup[^>]*class="reference"[^>]*>.*?</sup>',   "", html, flags=_r.DOTALL)
    html = _r.sub(r"<br\s*/?>",                                   "\n", html)
    html = _r.sub(r"</p>",                                        "\n\n", html)
    cleaned = _r.sub(r"<[^>]+>",                                  "", html)
    cleaned = _r.sub(r"\[\d+\]",                                  "", cleaned)   # footnote refs
    cleaned = _r.sub(r"\n{3,}",                                   "\n\n", cleaned).strip()
    if len(cleaned) < 400:
        return None

    paragraphs = []
    for chunk in _JR_PARA_RX.split(cleaned):
        t = chunk.strip()
        if not t:
            continue
        num_match = _r.match(r"^((?:\d+\.|[א-י][.)])\s)(.+)", t, _r.DOTALL)
        if num_match:
            paragraphs.append({"num": num_match.group(1).strip(),
                                "text": num_match.group(2).strip()})
        else:
            paragraphs.append({"num": None, "text": t})

    citations = []
    seen = set()
    for m in _JR_CASE_RX.finditer(cleaned):
        c = m.group(1).strip()
        if c not in seen:
            seen.add(c); citations.append(c)

    from urllib.parse import quote as _q
    return {
        "ok":            True,
        "doc_id":        f"wikipedia:{title}",
        "citation":      cite,
        "court":         None,
        "source":        "wikipedia",
        "source_url":    f"https://he.wikipedia.org/wiki/{urllib.parse.quote(title)}",
        "source_title":  title,
        "n_paragraphs":  len(paragraphs),
        "n_citations":   len(citations),
        "n_chars":       len(cleaned),
        "paragraphs":    paragraphs[:200],
        "citations":     citations[:60],
        "links": {
            "nevo":   "https://www.google.com/search?q=" + _q('site:nevo.co.il "' + cite + '"'),
            "courts": "https://www.google.com/search?q=" + _q('"' + cite + '"'),
        },
    }


# Tiny LRU cache for external fetches — avoids re-pulling same case
# repeatedly. Capped at 100 most-recent.
_EXTERNAL_FETCH_CACHE: Dict[str, dict] = {}
_EXTERNAL_FETCH_ORDER: deque = deque(maxlen=100)


@app.get("/v1/judgment/by-cite")
def judgment_reader_by_cite(cite: str):  # type: ignore
    """Day 51 — resolve a citation string to a corpus doc_id and
    return the same structured render as /v1/judgment/{doc_id}/render.

    Input may include surrounding text (case title, parties, year).
    We extract just the canonical citation prefix (e.g. 'ע\"א 207/79')
    before looking up in cn.doc_for_citation, since the network keys
    on prefixes only.

    Returns 404 with rich metadata (the canonical citation we tried
    + suggested external links) so the frontend can show a graceful
    fallback to Nevo / courts.gov.il.
    """
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build
        pipe = get_pipeline()
        cn = get_or_build(pipe)

        raw = (cite or "").strip()
        if not raw:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "empty_citation",
            })

        # Extract just the canonical citation prefix (e.g. "ע\"א 207/79").
        # The input often arrives with case title attached:
        #   "ע\"א 207/79 רביב נ' בית יולס" → strip down to "ע\"א 207/79"
        m = _JR_CASE_RX.search(raw)
        prefix = m.group(1).strip() if m else raw

        # Try several normalization variants of the prefix
        candidates = [
            prefix,
            prefix.replace('"', '"').replace("'", "׳"),
            prefix.replace('"', '"').replace("'", "'"),
            # Compact form (no spaces): "ע\"א 207/79" → "ע\"א207/79"
            _re.sub(r"\s+", "", prefix),
        ]
        doc_id = None
        for c in candidates:
            doc_id = cn.doc_for_citation.get(c)
            if doc_id:
                break

        if not doc_id:
            # Day 52 — corpus miss → try Hebrew Wikisource before
            # giving up. Many famous rulings are community-transcribed
            # there with full text. Cached in-process to avoid repeat fetches.
            cached = _EXTERNAL_FETCH_CACHE.get(prefix)
            if cached:
                return cached
            for fetcher in (_fetch_from_wikisource, _fetch_from_wikipedia):
                result = fetcher(prefix)
                if result:
                    _EXTERNAL_FETCH_CACHE[prefix] = result
                    _EXTERNAL_FETCH_ORDER.append(prefix)
                    if len(_EXTERNAL_FETCH_CACHE) > _EXTERNAL_FETCH_ORDER.maxlen:
                        oldest = _EXTERNAL_FETCH_ORDER.popleft()
                        _EXTERNAL_FETCH_CACHE.pop(oldest, None)
                    return result

            # Last-resort: 404 with Google escape-hatches.
            from urllib.parse import quote
            nevo_q = quote(f'site:nevo.co.il "{prefix}"')
            courts_q = quote(f'"{prefix}"')
            return JSONResponse(status_code=404, content={
                "ok": False,
                "reason": "citation_not_in_corpus",
                "cite":   prefix,
                "raw":    raw,
                "links": {
                    "nevo":   f"https://www.google.com/search?q={nevo_q}",
                    "courts": f"https://www.google.com/search?q={courts_q}",
                },
            })
        return judgment_reader_render(doc_id)
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


# ──────────────────────────────────────────────────────────────────────
# WhatsApp Phase 2 — anonymized observation aggregation.
# Storage: append-only JSONL at runtime/whatsapp_observations.jsonl.
# Retention: 30 days (older entries pruned on read).
# What we accept: {group_hash, ts, kind, payload}.
# What we DON'T accept: sender names, raw messages, plaintext IDs.
# ──────────────────────────────────────────────────────────────────────
import pathlib as _wa_pl
import threading as _wa_th
import time as _wa_time
import hashlib as _wa_hl

_WA_OBS_PATH = _wa_pl.Path("tau_rag/runtime/whatsapp_observations.jsonl")
_WA_OBS_LOCK = _wa_th.Lock()
_WA_TTL_DAYS = 30


def _wa_append_obs(observations):
    _WA_OBS_PATH.parent.mkdir(parents=True, exist_ok=True)
    with _WA_OBS_LOCK:
        with open(_WA_OBS_PATH, "a", encoding="utf-8") as f:
            for o in observations:
                f.write(json.dumps(o, ensure_ascii=False) + "\n")


def _wa_read_obs():
    """Read all observations within TTL, ignore malformed lines."""
    if not _WA_OBS_PATH.exists():
        return []
    cutoff_ts = _wa_time.time() - (_WA_TTL_DAYS * 86400)
    out = []
    with _WA_OBS_LOCK:
        with open(_WA_OBS_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    o = json.loads(line)
                except Exception:
                    continue
                if (o.get("ts") or 0) < cutoff_ts:
                    continue
                out.append(o)
    return out


class _WaObservation(BaseModel):  # type: ignore
    group_hash: str
    ts:         float
    kind:       str         # 'cite' or 'question'
    payload:    Dict[str, Any]


class _WaObserveRequest(BaseModel):  # type: ignore
    observations: List[_WaObservation]


@app.post("/v1/whatsapp/observe")
def whatsapp_observe(body: _WaObserveRequest):  # type: ignore
    """Day 55 — accept anonymized WhatsApp observations from the
    Chrome helper extension. Stored in JSONL, used by the dashboard
    to surface trends (top cites, top questions).

    Strict validation:
    - group_hash must be a hex SHA256 (64 chars, not the original ID)
    - kind ∈ {'cite', 'question'}
    - payload restricted shape:
      cite → {cite: str}
      question → {q_hash: str, q_text_clean: str, q_len: int}
    """
    accepted = []
    rejected = 0
    for obs in (body.observations or [])[:200]:  # cap batch
        try:
            gh = (obs.group_hash or "").strip().lower()
            if len(gh) != 64 or not all(c in "0123456789abcdef" for c in gh):
                rejected += 1
                continue
            if obs.kind not in ("cite", "question", "event"):
                rejected += 1
                continue
            pl = obs.payload or {}
            if obs.kind == "cite":
                if not isinstance(pl.get("cite"), str) or len(pl.get("cite", "")) > 100:
                    rejected += 1
                    continue
                clean_pl = {"cite": pl["cite"][:100]}
            elif obs.kind == "question":
                qh = (pl.get("q_hash") or "").strip().lower()
                if len(qh) != 64 or not all(c in "0123456789abcdef" for c in qh):
                    rejected += 1
                    continue
                qt = (pl.get("q_text_clean") or "").strip()[:500]
                clean_pl = {"q_hash": qh, "q_text_clean": qt,
                            "q_len": int(pl.get("q_len") or 0)}
                # v1.0.0 — opt-in sender + message timestamp enrichment
                sender = (pl.get("sender") or "").strip()[:80]
                if sender:
                    clean_pl["sender"] = sender
                msg_iso = (pl.get("msg_ts_iso") or "").strip()
                if _re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(:\d{2})?$", msg_iso):
                    clean_pl["msg_ts_iso"] = msg_iso
            else:  # event
                # date_iso must be YYYY-MM-DD; title 5-300 chars
                di = (pl.get("date_iso") or "").strip()
                if not _re.match(r"^\d{4}-\d{2}-\d{2}$", di):
                    rejected += 1
                    continue
                title = (pl.get("title") or "").strip()[:300]
                if len(title) < 5:
                    rejected += 1
                    continue
                ev_kind = (pl.get("ev_kind") or "").strip()[:20]
                location = (pl.get("location") or "").strip()[:100]
                clean_pl = {"date_iso": di, "title": title,
                            "ev_kind": ev_kind, "location": location}
            accepted.append({
                "group_hash": gh,
                "ts":         float(obs.ts),
                "kind":       obs.kind,
                "payload":    clean_pl,
            })
        except Exception:
            rejected += 1
            continue
    if accepted:
        _wa_append_obs(accepted)
    return {"ok": True, "accepted": len(accepted), "rejected": rejected}


@app.get("/v1/whatsapp/group/{group_hash}")
def whatsapp_group_detail(group_hash: str, days: int = 30, limit: int = 200):  # type: ignore
    """Day 56 — return all observations for a single group, sorted by
    most recent first. Used by the dashboard's "click group → see
    messages" view."""
    gh = (group_hash or "").strip().lower()
    if len(gh) != 64 or not all(c in "0123456789abcdef" for c in gh):
        return JSONResponse(status_code=400, content={
            "ok": False, "reason": "invalid_group_hash",
        })
    days  = max(1, min(days, _WA_TTL_DAYS))
    limit = max(10, min(limit, 1000))
    cutoff = _wa_time.time() - (days * 86400)
    rows = [o for o in _wa_read_obs()
            if o.get("group_hash") == gh and o.get("ts", 0) >= cutoff]
    rows.sort(key=lambda o: -(o.get("ts") or 0))
    rows = rows[:limit]
    # Bucket per kind for easier UI rendering
    cites    = [o for o in rows if o.get("kind") == "cite"]
    questions= [o for o in rows if o.get("kind") == "question"]
    events   = [o for o in rows if o.get("kind") == "event"]
    return {
        "ok":               True,
        "group_hash":       gh,
        "days":             days,
        "n_observations":   len(rows),
        "n_cites":          len(cites),
        "n_questions":      len(questions),
        "n_events":         len(events),
        "observations":     rows,
    }


@app.get("/v1/whatsapp/dashboard")
def whatsapp_dashboard(days: int = 7, top_k: int = 20):  # type: ignore
    """Day 55 — aggregated view of WhatsApp observations.

    v2.99.84 — 5-sec in-memory cache (per days/top_k key). The dashboard
    is opened many times in a single session; without cache each open
    re-reads the entire JSONL + re-runs the retriever for every top question.
    With cache: only the first open per 5-sec window does real work.

    Returns:
      top_cites:    most-mentioned citations across all observed groups
      top_questions: most-asked questions (by q_hash)
                    For each, runs our retriever to check if we have a
                    plausible answer (top-1 hit confidence).
      stats:        observation/group counts
    """
    days   = max(1, min(days, _WA_TTL_DAYS))
    top_k  = max(5, min(top_k, 100))
    # Cache check
    cache_key = (days, top_k)
    cached = _WA_DASH_CACHE.get(cache_key)
    if cached and cached[1] > _wa_time.time():
        return cached[0]
    cutoff = _wa_time.time() - (days * 86400)
    rows = [o for o in _wa_read_obs() if o.get("ts", 0) >= cutoff]

    # Aggregate cites
    cite_counts:   Dict[str, int]      = {}
    cite_groups:   Dict[str, set]      = {}
    for o in rows:
        if o.get("kind") != "cite": continue
        c = (o.get("payload") or {}).get("cite") or ""
        if not c: continue
        cite_counts[c] = cite_counts.get(c, 0) + 1
        cite_groups.setdefault(c, set()).add(o.get("group_hash"))

    top_cites = sorted(
        [{"cite": c, "n_mentions": n, "n_groups": len(cite_groups.get(c) or set())}
         for c, n in cite_counts.items()],
        key=lambda r: (-r["n_mentions"], r["cite"]),
    )[:top_k]

    # Aggregate questions
    q_counts:    Dict[str, int]        = {}
    q_groups:    Dict[str, set]        = {}
    q_repr:      Dict[str, str]        = {}    # q_hash → representative text
    q_meta:      Dict[str, list]       = {}    # q_hash → list of {sender, msg_ts_iso, ts}
    for o in rows:
        if o.get("kind") != "question": continue
        pl = o.get("payload") or {}
        qh = pl.get("q_hash") or ""
        if not qh: continue
        q_counts[qh] = q_counts.get(qh, 0) + 1
        q_groups.setdefault(qh, set()).add(o.get("group_hash"))
        if qh not in q_repr:
            q_repr[qh] = pl.get("q_text_clean") or ""
        # v1.0.0 — collect sender + message-time metadata if available
        meta_row = {
            "ts":         o.get("ts"),
            "sender":     pl.get("sender")     or None,
            "msg_ts_iso": pl.get("msg_ts_iso") or None,
        }
        if meta_row["sender"] or meta_row["msg_ts_iso"]:
            q_meta.setdefault(qh, []).append(meta_row)

    # For top-N questions, check if our retriever has an answer
    q_ranked = sorted(q_counts.items(), key=lambda kv: -kv[1])[:top_k]
    top_questions = []
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
    except Exception:
        pipe = None
    for qh, n in q_ranked:
        text = q_repr.get(qh) or ""
        we_answer = None
        confidence = None
        try:
            if pipe and text:
                # Use search_documents (the real pipeline API; the
                # `retrieve` name from earlier draft didn't exist).
                hits = pipe.search_documents(text, top_k=3)
                if hits:
                    confidence = float(getattr(hits[0], "score", 0.0))
                    # BM25 raw scores run ~0.01-0.15 for good matches —
                    # 0.02 is a safer "we found something relevant" gate.
                    we_answer  = confidence >= 0.02
                else:
                    we_answer = False
        except Exception as e:
            # Surface the exception class in confidence field so we can
            # see it in the dashboard rather than silently dropping it.
            we_answer = False
            confidence = -1.0
        # Up to 5 most recent {sender, msg_ts_iso} for this question
        recent_meta = sorted(
            q_meta.get(qh) or [],
            key=lambda r: r.get("ts") or 0,
            reverse=True,
        )[:5]
        top_questions.append({
            "q_hash":      qh,
            "q_text":      text,
            "n_mentions":  n,
            "n_groups":    len(q_groups.get(qh) or set()),
            "we_answer":   we_answer,
            "confidence":  confidence,
            "recent_meta": recent_meta,
        })

    # Aggregate upcoming events (de-dup by date+title-prefix, future only)
    from datetime import date as _date, datetime as _dt
    today_iso = _date.today().isoformat()
    ev_counts:  Dict[str, int]  = {}
    ev_groups:  Dict[str, set]  = {}
    ev_meta:    Dict[str, dict] = {}
    for o in rows:
        if o.get("kind") != "event": continue
        pl = o.get("payload") or {}
        di = pl.get("date_iso") or ""
        title = pl.get("title") or ""
        if not di or not title: continue
        if di < today_iso: continue  # past
        # Dedup key: date + 40-char title prefix (loose match)
        key = di + "|" + title[:40].strip().lower()
        ev_counts[key] = ev_counts.get(key, 0) + 1
        ev_groups.setdefault(key, set()).add(o.get("group_hash"))
        if key not in ev_meta:
            ev_meta[key] = {
                "date_iso": di,
                "title":    title,
                "ev_kind":  pl.get("ev_kind") or "",
                "location": pl.get("location") or "",
            }
    top_events = []
    for key, n in ev_counts.items():
        m = ev_meta[key]
        top_events.append({
            **m,
            "n_mentions": n,
            "n_groups":   len(ev_groups.get(key) or set()),
        })
    # Sort by date ascending (soonest first), then by mentions desc
    top_events.sort(key=lambda e: (e["date_iso"], -e["n_mentions"]))
    top_events = top_events[:top_k]

    response = {
        "ok":              True,
        "days":            days,
        "n_observations":  len(rows),
        "n_groups":        len({o.get("group_hash") for o in rows if o.get("group_hash")}),
        "top_cites":       top_cites,
        "top_questions":   top_questions,
        "top_events":      top_events,
    }
    # Cache write (v2.99.84)
    _WA_DASH_CACHE[cache_key] = (response, _wa_time.time() + _WA_DASH_TTL)
    return response


def _strip_remaining_templates(s: str) -> str:
    """Final pass: strip any leftover `{{...}}` wikitext templates we
    don't explicitly handle, leaving plain Hebrew text. Handles nested
    braces (one level) so `{{ח:פנימי|x|y}}` already-substituted leftovers
    don't leak through."""
    import re as _r
    # Strip simple templates (no nested braces)
    prev = None
    cur = s
    while prev != cur:
        prev = cur
        cur = _r.sub(r"\{\{[^{}]*\}\}", "", cur)
    # Drop wikitext list bullets
    cur = _r.sub(r"^[\*#:]+\s*", "", cur)
    # Collapse whitespace
    cur = _r.sub(r"\s+", " ", cur).strip()
    return cur


# ──────────────────────────────────────────────────────────────────────
# Law cache — every successful Wikisource fetch is saved to disk so the
# same law never needs to be re-fetched. File: tau_rag/runtime/law_cache/
# Each law: one JSON file named `{sha256(name)}.json`.
# Also tracks aliases so "חוק החוזים" and "חוק החוזים (חלק כללי)" both
# resolve to the same cached entry.
# ──────────────────────────────────────────────────────────────────────
_LAW_CACHE_DIR = _wa_pl.Path("tau_rag/runtime/law_cache")
_LAW_ALIAS_FILE = _LAW_CACHE_DIR / "aliases.json"
_LAW_CACHE_LOCK = _wa_th.Lock()


def _law_cache_key(name: str) -> str:
    return _wa_hl.sha256(name.strip().encode("utf-8")).hexdigest()[:16]


def _law_cache_get(name: str) -> Optional[dict]:
    if not name: return None
    # Try direct hit
    p = _LAW_CACHE_DIR / (_law_cache_key(name) + ".json")
    if p.exists():
        try:
            with open(p, "r", encoding="utf-8") as f:
                return json.load(f)
        except Exception:
            return None
    # Try alias lookup
    try:
        with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f:
            aliases = json.load(f)
        canonical = aliases.get(name.strip())
        if canonical:
            return _law_cache_get(canonical)
    except Exception:
        pass
    return None


def _law_cache_set(name: str, canonical_title: str, data: dict) -> None:
    _LAW_CACHE_DIR.mkdir(parents=True, exist_ok=True)
    with _LAW_CACHE_LOCK:
        # Write data under canonical title's key
        canon_key = _law_cache_key(canonical_title)
        p = _LAW_CACHE_DIR / (canon_key + ".json")
        try:
            with open(p, "w", encoding="utf-8") as f:
                json.dump(data, f, ensure_ascii=False)
        except Exception as e:
            print(f"[law-cache] write fail: {e}")
            return
        # Record alias from requested name → canonical title
        if name.strip() != canonical_title:
            aliases = {}
            try:
                with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f:
                    aliases = json.load(f)
            except Exception:
                pass
            aliases[name.strip()] = canonical_title
            try:
                with open(_LAW_ALIAS_FILE, "w", encoding="utf-8") as f:
                    json.dump(aliases, f, ensure_ascii=False)
            except Exception:
                pass


def _law_cache_list() -> List[dict]:
    """Return summary of all cached laws (for the popular-list UI)."""
    if not _LAW_CACHE_DIR.exists(): return []
    out = []
    for p in sorted(_LAW_CACHE_DIR.glob("*.json")):
        if p.name == "aliases.json": continue
        try:
            with open(p, "r", encoding="utf-8") as f:
                d = json.load(f)
            out.append({
                "name":         d.get("name") or "?",
                "n_sections":   d.get("n_sections") or 0,
                "n_paragraphs": d.get("n_paragraphs") or 0,
                "n_chars":      d.get("n_chars") or 0,
                "source_url":   d.get("source_url"),
            })
        except Exception:
            continue
    return out


@app.post("/v1/laws/prefetch")
def laws_prefetch(top: int = 20):  # type: ignore
    """v2.99.101 — Background-friendly bulk prefetch.

    For the `top` most-cited laws in the corpus that are NOT yet cached,
    fetch them from Wikisource and store to disk.

    Cap at 20 per request to be polite to Wikisource (which is community-
    operated). Run multiple times for larger batches.
    """
    import time as _t
    try:
        pop = laws_popular(top * 2)  # over-fetch since many may already be cached
        if isinstance(pop, JSONResponse): return pop
        items = pop.get("items", [])
        target = [it for it in items if not it.get("cached")][:max(1, min(top, 20))]
        if not target:
            return {"ok": True, "n_fetched": 0, "reason": "all_already_cached"}
        fetched = []
        failed  = []
        for it in target:
            res = law_by_name(it["name"])
            if isinstance(res, dict) and res.get("ok"):
                fetched.append({
                    "name":         it["name"],
                    "n_sections":   res.get("n_sections", 0),
                    "n_paragraphs": res.get("n_paragraphs", 0),
                })
            else:
                failed.append({
                    "name":   it["name"],
                    "reason": (res.get("reason") if isinstance(res, dict) else None) or "unknown",
                })
            _t.sleep(0.5)  # polite delay (~2 req/sec max)
        return {
            "ok":          True,
            "n_fetched":   len(fetched),
            "n_failed":    len(failed),
            "fetched":     fetched,
            "failed":      failed,
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.get("/v1/laws/cached")
def laws_cached():  # type: ignore
    """List all laws that have been fetched and cached on disk."""
    items = _law_cache_list()
    items.sort(key=lambda r: r.get("n_chars", 0), reverse=True)
    return {
        "ok":     True,
        "n":      len(items),
        "items":  items,
    }


# ──────────────────────────────────────────────────────────────────────
# v2.99.104 — Auto-seed law cache on cold start. HF Space's ephemeral
# storage wipes the cache on every rebuild, so we kick off a background
# prefetch of the top-20 most-cited laws right after import. Subsequent
# user requests for popular laws then hit cache, not Wikisource.
# ──────────────────────────────────────────────────────────────────────
def _autoseed_law_cache() -> None:
    import threading, time
    def _worker():
        try:
            time.sleep(30)  # let pipeline finish loading first
            if len(_law_cache_list()) >= 15:
                print(f"[law-cache] already seeded ({len(_law_cache_list())} laws); skip")
                return
            print("[law-cache] cold start — seeding top-20 popular laws")
            res = laws_prefetch(top=20)
            if isinstance(res, dict) and res.get("ok"):
                print(f"[law-cache] seed complete: {res.get('n_fetched')} fetched, "
                      f"{res.get('n_failed')} failed")
            else:
                print(f"[law-cache] seed failed: {res}")
        except Exception as e:
            print(f"[law-cache] seed exception: {type(e).__name__}: {e}")
    threading.Thread(target=_worker, daemon=True).start()

_autoseed_law_cache()


@app.get("/v1/laws/popular")
def laws_popular(top: int = 40):  # type: ignore
    """Return the laws most-mentioned in our judgment corpus.
    Uses citation_network.popular_citations filtered to statute kind."""
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build, popular_citations
        pipe = get_pipeline()
        cn   = get_or_build(pipe)
        rows = popular_citations(cn, top_k=max(top, 80), kind="statute")
        # Filter to laws (statute_refs include both laws & regulations)
        # Also dedupe by canonical-looking name (drop year suffixes for matching)
        seen = set()
        items = []
        for r in rows:
            name = r.get("citation") or ""
            if not name: continue
            # Strip year-suffix patterns: ", התש..."
            short = _re.sub(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־]\d{4}\s*$", "", name).strip()
            if short in seen: continue
            seen.add(short)
            items.append({
                "name":     short,
                "full":     name,
                "n_citers": r.get("n_citers") or 0,
            })
            if len(items) >= top: break
        # v2.99.103 — Mark cached. The cache stores canonical titles like
        # "חוק החוזים (חלק כללי), תשל״ג–1973" but popular returns short names
        # with year stripped. Match against multiple forms (short, long, alias)
        # AND fall back to a prefix-match (cached title starts with item name).
        cached_long = set(c["name"] for c in _law_cache_list())
        # Year-strip — note en-dash (–) and em-dash (—) in addition to hyphen/maqaf
        _year_re = _re.compile(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־–—]\d{4}\s*$")
        cached_short = set(_year_re.sub("", n).strip() for n in cached_long)
        alias_keys = set()
        try:
            with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f:
                alias_keys = set(json.load(f).keys())
        except Exception:
            pass
        def _is_cached(short_name: str, full_name: str) -> bool:
            if not short_name: return False
            if short_name in cached_long or full_name in cached_long: return True
            if short_name in cached_short or full_name in cached_short: return True
            if short_name in alias_keys or full_name in alias_keys: return True
            # Prefix fallback — canonical "X, תש..." starts with short "X"
            prefix = short_name + ","
            for c in cached_long:
                if c.startswith(prefix): return True
            return False
        for it in items:
            it["cached"] = _is_cached(it["name"], it["full"])
        return {"ok": True, "n": len(items), "items": items}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


# ──────────────────────────────────────────────────────────────────────
# v2.99.114 — Section-level reverse links. For each law, scan all
# judgments that cite it and extract "סעיף N לחוק X" patterns to build
# a per-section citer count. Cached to disk under law_cache_meta/.
# ──────────────────────────────────────────────────────────────────────
_LAW_META_DIR = _wa_pl.Path("tau_rag/runtime/law_cache_meta")

def _section_citer_key(canonical: str) -> str:
    return _wa_hl.sha256(canonical.strip().encode("utf-8")).hexdigest()[:16]

def _build_section_citer_index(pipe, cn, canonical: str, lookup_strs: set) -> dict:
    """Scan all docs that cite this law, extract per-section citers.
    Returns: {section_number: {"n": int, "doc_ids": [str, ...]}}"""
    # Build regex: catches "סעיף N לX" / "ס' N לX" / "תקנה N לX"
    def esc_rx(s):
        return _re.sub(r'([.*+?^${}()|\[\]\\])', r'\\\1', s)
    sorted_strs = sorted(lookup_strs, key=len, reverse=True)
    law_alt = "|".join(esc_rx(s) for s in sorted_strs)
    rx = _re.compile(
        r"(?:סעיף|ס\'|תקנה|תקנות)\s+([0-9]+[א-ת]?(?:\([א-ת0-9]{1,3}\))?)\s+ל(?:" + law_alt + r")"
    )

    # Get all docs that cite this law (from any lookup string)
    candidate_ids = set()
    for s in lookup_strs:
        for did in (cn.cited_by.get(s, []) or []):
            candidate_ids.add(did)

    # Filter out non-judgment docs
    NON_JUD = ("heb_law/", "heb_statute/", "heb_wikilaw/", "heb_kolzchut/")
    candidate_ids = {d for d in candidate_ids if not any(d.startswith(p) for p in NON_JUD)}

    by_section: Dict[str, dict] = {}
    docs_meta: Dict[str, dict] = {}
    docs = (getattr(pipe, "_indexed_docs", None) or
            getattr(pipe, "_docs", None) or [])
    doc_by_id = {}
    for d in docs:
        did = getattr(d, "doc_id", None) or getattr(d, "id", None)
        if did and did in candidate_ids: doc_by_id[did] = d

    for did, d in doc_by_id.items():
        md   = getattr(d, "metadata", None) or {}
        if md.get("kind") in ("statute", "law", "wikipedia", "kolzchut"): continue
        text = getattr(d, "text", "") or ""
        if not text and hasattr(pipe, "get_text"):
            try: text = pipe.get_text(did) or ""
            except Exception: text = ""
        if not text: continue
        # Track meta for later expansion
        docs_meta[did] = {
            "citation":   md.get("citation") or did,
            "court":      md.get("court", ""),
            "verdict_dt": (md.get("verdict_dt") or "")[:10],
        }
        seen_sections_in_doc = set()
        for m in rx.finditer(text):
            sec = m.group(1).strip().rstrip(".")
            if sec in seen_sections_in_doc: continue
            seen_sections_in_doc.add(sec)
            # v2.99.116 — law-specific classifier
            match_text = m.group(0)
            rel = _classify_law_citation_context(text, match_text)
            slot = by_section.setdefault(sec, {"n": 0, "doc_ids": [], "relations": []})
            slot["n"] += 1
            slot["doc_ids"].append(did)
            slot["relations"].append(rel)

    return {
        "schema":     "v3-law-rel",  # v2.99.116 — law-specific relations
        "canonical":  canonical,
        "n_sections_cited": len(by_section),
        "n_docs":     len(docs_meta),
        "sections":   by_section,
        "docs_meta":  docs_meta,
    }


@app.get("/v1/law/section-summary")
def law_section_summary(name: str):  # type: ignore
    """v2.99.114 — Per-section citer counts for a law.
    Returns sections={section_num: n_citers} for badge display in UI.
    Cached to disk after first build (slow scan)."""
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build
        pipe = get_pipeline()
        cn = get_or_build(pipe)

        nm = (name or "").strip()
        if not nm:
            return JSONResponse(status_code=400, content={"ok": False, "reason": "empty_name"})

        # Resolve canonical
        alias_map = {}
        try:
            with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f:
                alias_map = json.load(f)
        except Exception: pass
        canonical = alias_map.get(nm, nm)

        # Try cache (only if it matches current schema)
        _LAW_META_DIR.mkdir(parents=True, exist_ok=True)
        cache_p = _LAW_META_DIR / (_section_citer_key(canonical) + ".json")
        if cache_p.exists():
            try:
                with open(cache_p, "r", encoding="utf-8") as f:
                    cached = json.load(f)
                if cached.get("schema") == "v3-law-rel":
                    return {
                        "ok":          True,
                        "name":        nm,
                        "canonical":   canonical,
                        "from_cache":  True,
                        "n_sections":  cached.get("n_sections_cited", 0),
                        "n_docs":      cached.get("n_docs", 0),
                        "sections":    {s: v["n"] for s, v in cached.get("sections", {}).items()},
                    }
                # else: stale schema, fall through and rebuild
            except Exception: pass

        # Build lookup_strs same way as law_citers
        lookup_strs = {nm, canonical}
        for short, canon in alias_map.items():
            if canon == canonical: lookup_strs.add(short)
        _year_re = _re.compile(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־–—]\d{4}\s*$")
        short_canon = _year_re.sub("", canonical).strip()
        if short_canon != canonical: lookup_strs.add(short_canon)

        # Build the index (slow)
        index = _build_section_citer_index(pipe, cn, canonical, lookup_strs)
        # Cache
        try:
            with open(cache_p, "w", encoding="utf-8") as f:
                json.dump(index, f, ensure_ascii=False)
        except Exception as e:
            print(f"[section-citers] cache write fail: {e}")

        return {
            "ok":          True,
            "name":        nm,
            "canonical":   canonical,
            "from_cache":  False,
            "n_sections":  index["n_sections_cited"],
            "n_docs":      index["n_docs"],
            "sections":    {s: v["n"] for s, v in index["sections"].items()},
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})


@app.get("/v1/law/section-citers")
def law_section_citers(name: str, section: str, top: int = 20):  # type: ignore
    """v2.99.114 — Judgments that cite a SPECIFIC section of a law."""
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build
        pipe = get_pipeline()
        cn = get_or_build(pipe)

        nm = (name or "").strip()
        sec = (section or "").strip().rstrip(".")
        if not nm or not sec:
            return JSONResponse(status_code=400, content={"ok": False, "reason": "missing_args"})

        # Resolve canonical
        alias_map = {}
        try:
            with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f:
                alias_map = json.load(f)
        except Exception: pass
        canonical = alias_map.get(nm, nm)

        # Load index from cache (build if missing)
        _LAW_META_DIR.mkdir(parents=True, exist_ok=True)
        cache_p = _LAW_META_DIR / (_section_citer_key(canonical) + ".json")
        if not cache_p.exists():
            # Trigger summary build first (which writes the cache)
            law_section_summary(nm)
        if not cache_p.exists():
            return {"ok": True, "n": 0, "items": []}
        with open(cache_p, "r", encoding="utf-8") as f:
            index = json.load(f)

        sect_data = (index.get("sections") or {}).get(sec)
        if not sect_data:
            return {"ok": True, "n": 0, "items": [], "section": sec, "canonical": canonical}

        docs_meta = index.get("docs_meta", {})
        relations = sect_data.get("relations") or ([None] * len(sect_data["doc_ids"]))
        items = []
        for i, did in enumerate(sect_data["doc_ids"][:top]):
            md = docs_meta.get(did, {})
            items.append({
                "doc_id":     did,
                "citation":   md.get("citation") or did,
                "court":      md.get("court", ""),
                "verdict_dt": md.get("verdict_dt", ""),
                "relation":   (relations[i] if i < len(relations) else None) or "neutral",
            })
        # Sort newest first
        def _year(r):
            s = (r.get("verdict_dt") or "")[:4]
            return -int(s) if s.isdigit() else 0
        items.sort(key=_year)

        return {
            "ok":         True,
            "name":       nm,
            "canonical":  canonical,
            "section":    sec,
            "n":          len(items),
            "n_total":    sect_data["n"],
            "items":      items,
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})


# ──────────────────────────────────────────────────────────────────────
# v2.99.151 — Branded email helper. Wraps a plain-text body in a
# legal-eye HTML template (Navy + Gold + Frank Ruhl Libre) and sends as
# multipart/alternative (plain + HTML). Used by all SMTP-based email
# senders (magic link, lawyer assigned, lead notify, status change,
# dispute, admin message, digest, lawyer application).
# ──────────────────────────────────────────────────────────────────────
def _build_branded_email_html(plain_body: str, *, title: str = "",
                               cta_label: str = "", cta_url: str = "") -> str:
    """Wrap plain text in branded HTML template. Inline CSS only (email-safe)."""
    import html as _h
    paras = []
    for blk in (plain_body or "").split("\n\n"):
        blk = blk.strip()
        if not blk:
            continue
        esc = _h.escape(blk).replace("\n", "<br>")
        # Auto-link bare URLs (very simple: http(s)://...)
        import re as _re
        esc = _re.sub(r'(https?://[^\s<]+)',
                       r'<a href="\1" style="color:#9A7423;text-decoration:underline;">\1</a>', esc)
        paras.append(f'<p style="margin:0 0 14px 0;line-height:1.75;color:#0A0F1A;font-size:15px;">{esc}</p>')
    body_html = "\n".join(paras)

    title_html = ""
    if title:
        title_html = (
            f'<h2 style="font-family:\'Frank Ruhl Libre\',Georgia,serif;font-size:24px;'
            f'color:#0E2A47;margin:0 0 18px 0;font-weight:700;line-height:1.3;">'
            f'{_h.escape(title)}</h2>'
        )

    cta_html = ""
    if cta_label and cta_url:
        cta_html = (
            f'<div style="margin:28px 0 8px 0;text-align:center;">'
            f'<a href="{_h.escape(cta_url, quote=True)}" '
            f'style="display:inline-block;background:#C89B3C;color:#0E2A47;'
            f'text-decoration:none;padding:14px 36px;border-radius:4px;'
            f'font-weight:700;font-size:15px;font-family:Heebo,Arial,sans-serif;'
            f'letter-spacing:0.3px;">{_h.escape(cta_label)}</a>'
            f'</div>'
        )

    return f"""<!doctype html>
<html lang="he" dir="rtl">
<head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1"></head>
<body style="margin:0;padding:0;background:#FBF8F1;font-family:Heebo,'Helvetica Neue',Arial,sans-serif;direction:rtl;">
  <table role="presentation" width="100%" cellpadding="0" cellspacing="0" border="0" style="background:#FBF8F1;padding:32px 12px;">
    <tr><td align="center">
      <table role="presentation" cellpadding="0" cellspacing="0" border="0" style="background:#FFFFFF;max-width:560px;width:100%;border:1px solid #E8E2D2;">
        <tr><td style="padding:22px 32px;border-bottom:1px solid #E8E2D2;background:#FBF8F1;">
          <div style="font-family:'Frank Ruhl Libre',Georgia,serif;font-size:22px;font-weight:700;color:#0E2A47;letter-spacing:1.5px;">LEGAL <span style="color:#C89B3C;">EYE</span></div>
          <div style="font-family:'Frank Ruhl Libre',Georgia,serif;font-size:10px;color:#9A7423;letter-spacing:2px;text-transform:uppercase;padding-top:3px;">Legal Intelligence</div>
        </td></tr>
        <tr><td style="height:3px;background:#C89B3C;font-size:0;line-height:0;">&nbsp;</td></tr>
        <tr><td style="padding:32px 32px 24px 32px;">
          {title_html}
          {body_html}
          {cta_html}
        </td></tr>
        <tr><td style="padding:18px 32px 22px 32px;border-top:1px solid #E8E2D2;background:#FBF8F1;">
          <p style="margin:0;font-family:'Frank Ruhl Libre',Georgia,serif;font-size:13px;color:#5C677A;line-height:1.6;font-style:italic;">רואים משפט אחרת · מקור. הקשר. עורך דין כשצריך.</p>
          <p style="margin:6px 0 0 0;font-family:Heebo,Arial,sans-serif;font-size:11px;color:#8A93A4;line-height:1.5;">© legal-eye · <a href="https://legal-eye.1bigfam.com" style="color:#9A7423;text-decoration:none;">legal-eye.1bigfam.com</a></p>
        </td></tr>
      </table>
    </td></tr>
  </table>
</body>
</html>"""


def _send_email_branded(to_addr: str, subject: str, plain_body: str, *,
                         title: str = "", cta_label: str = "", cta_url: str = "",
                         timeout: int = 15) -> bool:
    """Single SMTP sender. Sends multipart (plain + branded HTML). Returns True on send.
    Reads SMTP config from env (LE_LEAD_EMAIL_SMTP_*). If unconfigured, returns False."""
    import os, smtplib
    from email.mime.multipart import MIMEMultipart
    from email.mime.text import MIMEText
    try:
        host      = os.environ.get("LE_LEAD_EMAIL_SMTP_HOST", "smtp.gmail.com").strip()
        port_str  = os.environ.get("LE_LEAD_EMAIL_SMTP_PORT", "465").strip() or "465"
        user      = os.environ.get("LE_LEAD_EMAIL_SMTP_USER", "").strip()
        password  = os.environ.get("LE_LEAD_EMAIL_SMTP_PASS", "").strip()
        from_addr = os.environ.get("LE_LEAD_EMAIL_FROM", user).strip() or user
        if not (user and password and to_addr):
            return False
        msg = MIMEMultipart("alternative")
        msg["Subject"] = subject
        msg["From"]    = from_addr
        msg["To"]      = to_addr
        msg.attach(MIMEText(plain_body, "plain", "utf-8"))
        html_body = _build_branded_email_html(plain_body, title=title,
                                               cta_label=cta_label, cta_url=cta_url)
        msg.attach(MIMEText(html_body, "html", "utf-8"))
        port = int(port_str)
        if port == 465:
            with smtplib.SMTP_SSL(host, port, timeout=timeout) as s:
                s.login(user, password); s.send_message(msg)
        else:
            with smtplib.SMTP(host, port, timeout=timeout) as s:
                s.starttls(); s.login(user, password); s.send_message(msg)
        return True
    except Exception as e:
        print(f"[email] send FAIL to={to_addr}: {type(e).__name__}: {e}")
        return False


# ──────────────────────────────────────────────────────────────────────
# v2.99.120 — Lawyer waitlist. When a user clicks "פנה לעורך דין" from
# the triage result, we collect a structured request (no lawyer pool
# yet — pre-launch). Each submission saved to JSONL on disk.
# ──────────────────────────────────────────────────────────────────────
_LAWYER_REQUEST_PATH = _wa_pl.Path("tau_rag/runtime/lawyer_requests.jsonl")
_LAWYER_REQUEST_LOCK = _wa_th.Lock()


def _notify_lawyer_assigned_async(record: dict, lawyer_email: str, lawyer_name: str) -> None:
    """v2.99.136 — Email a lawyer when they're assigned a new request.
    PII gating: question + domain + risk visible, contact details NOT
    included (lawyer must accept first)."""
    import threading, os
    if not lawyer_email:
        return
    # v2.99.143 — in-app notification
    domain_he = {
        "employment_law": "דיני עבודה", "contracts": "דיני חוזים",
        "consumer": "דיני צרכנות", "tenancy": "שכירות / מקרקעין",
    }.get(record.get("domain", ""), record.get("domain", ""))
    _create_notification(
        lawyer_email,
        kind="lawyer_assigned",
        title=("⚠ פנייה דחופה הוקצתה לך" if record.get("urgent") else "פנייה חדשה הוקצתה לך"),
        body=f"תחום: {domain_he or '(לא סווג)'}",
        link="/lawyer-dashboard/",
        related_request_id=record.get("id", ""),
    )

    def _worker():
        q_short = (record.get("question") or "")[:300]
        urgent  = bool(record.get("urgent"))
        sla     = "4 שעות" if urgent else "24 שעות"
        domain_he_inner = {
            "employment_law": "דיני עבודה", "contracts": "דיני חוזים",
            "consumer": "דיני צרכנות", "tenancy": "שכירות / מקרקעין",
            "commercial": "מסחרי", "tax": "מיסים", "ip": "קניין רוחני",
        }.get(record.get("domain", ""), record.get("domain", ""))
        risk_he = {
            "high": "גבוה", "medium_high": "בינוני-גבוה",
            "medium": "בינוני", "low": "נמוך",
        }.get(record.get("risk_level", ""), record.get("risk_level", "?"))

        body = (
            f"שלום עו\"ד {lawyer_name},\n\n"
            f"פנייה חדשה הוקצתה אליך ב-legal-eye.\n\n"
            f"תחום: {domain_he_inner or '(לא סווג)'}\n"
            f"רמת סיכון: {risk_he}\n"
            f"דחיפות: {'⚠ דחוף · SLA ' + sla if urgent else 'רגיל · SLA ' + sla}\n"
            f"{'מצורף מסמך: כן' + chr(10) if record.get('has_attachment') else ''}\n"
            f"השאלה של הלקוח:\n"
            f"\"{q_short}{'...' if len(record.get('question','')) > 300 else ''}\"\n\n"
            f"השלב הבא: לחץ \"קבל\" בדשבורד כדי לראות את פרטי הקשר של הלקוח ולהתחיל בעבודה. אם הפנייה לא מתאימה לך — לחץ \"דחה\" ונשייך אותה לעו\"ד אחר.\n\n"
            f"מזהה: {record.get('id', '?')[:8]}"
        )
        subject = (
            f"[legal-eye] {'⚠ פנייה דחופה' if urgent else 'פנייה חדשה'} "
            f"· {domain_he_inner or '?'} · {record.get('id','?')[:8]}"
        )
        ok = _send_email_branded(
            lawyer_email, subject, body,
            title=("⚠ פנייה דחופה הוקצתה לך" if urgent else "פנייה חדשה הוקצתה לך"),
            cta_label="צפה בדשבורד ←",
            cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/",
        )
        if ok:
            print(f"[lawyer-assigned] sent → {lawyer_email}")

    threading.Thread(target=_worker, daemon=True).start()


def _notify_user_status_change_async(record: dict, old_status: str, new_status: str) -> None:
    """v2.99.133 — Email the user when admin changes their request's status.
    Fires only if record has user_email + SMTP configured. Silent on failure."""
    import threading, os
    if not record.get("user_email"):
        return
    # Only notify meaningful transitions (skip spam, no-op)
    if new_status not in ("contacted", "done"):
        return
    if new_status == old_status:
        return
    # v2.99.143 — in-app notification
    if new_status == "contacted":
        _create_notification(
            record["user_email"],
            kind="status_change",
            title="הצוות יצר איתך קשר על הפנייה שלך",
            body=f"עורך דין מאומת בתהליך שיוך. נחזור אליך בקרוב.",
            link="/my-matters/",
            related_request_id=record.get("id", ""),
        )
    elif new_status == "done":
        _create_notification(
            record["user_email"],
            kind="status_change",
            title="הפנייה שלך נסגרה",
            body="",
            link="/my-matters/",
            related_request_id=record.get("id", ""),
        )

    def _worker():
        to_addr = record["user_email"]
        q_short = (record.get("question") or "")[:140]
        q_ellipsis = "..." if len(record.get("question", "")) > 140 else ""
        if new_status == "contacted":
            subject = "legal-eye · הצוות יצר איתך קשר על הפנייה שלך"
            title   = "הפנייה שלך התקדמה"
            body = (
                f"שלום,\n\n"
                f"הפנייה שלך ל-legal-eye התקדמה — הצוות יצר איתך קשר ועכשיו הוא בתהליך שיוך עורך דין מתאים.\n\n"
                f"הפנייה: \"{q_short}{q_ellipsis}\"\n"
                f"מזהה: {record.get('id','?')[:8]}\n\n"
                f"אפשר לעקוב אחרי הסטטוס באזור האישי."
            )
        else:  # done
            subject = "legal-eye · הפנייה שלך טופלה"
            title   = "הפנייה שלך נסגרה"
            body = (
                f"שלום,\n\n"
                f"הפנייה שלך ל-legal-eye נסגרה. אם יש לך שאלות המשך — אפשר לפתוח פנייה חדשה דרך הצ'אט הראשי.\n\n"
                f"הפנייה: \"{q_short}{q_ellipsis}\"\n"
                f"מזהה: {record.get('id','?')[:8]}"
            )
        ok = _send_email_branded(
            to_addr, subject, body, title=title,
            cta_label="לאזור האישי ←",
            cta_url="https://legal-eye.1bigfam.com/my-matters/",
        )
        if ok:
            print(f"[user-notify] sent {new_status} email → {to_addr}")

    threading.Thread(target=_worker, daemon=True).start()


def _notify_lead_async(record: dict) -> None:
    """v2.99.122 — Notify outside-world about a new waitlist lead.
    Fire-and-forget — never blocks the API response, never raises.
    Two channels, both optional:
      - Webhook: env LE_LEAD_WEBHOOK_URL  (POST JSON to any URL)
      - Email:   env LE_LEAD_EMAIL_TO + LE_LEAD_EMAIL_SMTP_{HOST,PORT,USER,PASS}
    """
    import threading, os
    def _worker():
        # ── Webhook ──
        try:
            url = os.environ.get("LE_LEAD_WEBHOOK_URL", "").strip()
            if url:
                import urllib.request, urllib.error
                # Build a readable summary alongside the raw record
                summary = (
                    f"🆕 פנייה חדשה ל-legal-eye\n"
                    f"מזהה: {record.get('id', '?')[:8]}\n"
                    f"שאלה: {record.get('question', '')[:200]}\n"
                    f"שם: {record.get('name', '(לא צוין)')}\n"
                    f"קשר: {record.get('contact', '?')} ({record.get('contact_method', 'either')})\n"
                    f"תחום: {record.get('domain', '?')} · "
                    f"סיכון: {record.get('risk_level', '?')}\n"
                    f"דחוף: {'כן' if record.get('urgent') else 'לא'}\n"
                    f"הערות: {record.get('notes', '(אין)')}\n"
                    f"זמן: {record.get('iso', '?')}"
                )
                payload = {"text": summary, "summary": summary, "record": record}
                req = urllib.request.Request(
                    url, data=json.dumps(payload).encode("utf-8"),
                    headers={"Content-Type": "application/json"},
                    method="POST",
                )
                try:
                    with urllib.request.urlopen(req, timeout=10) as r:
                        print(f"[lead-notify] webhook OK ({r.status}) → {url[:60]}")
                except Exception as e:
                    print(f"[lead-notify] webhook FAIL: {type(e).__name__}: {e}")
        except Exception as e:
            print(f"[lead-notify] webhook outer FAIL: {e}")

        # ── Email via Gmail SMTP ──
        try:
            to_addr = os.environ.get("LE_LEAD_EMAIL_TO", "").strip()
            if to_addr:
                body = (
                    f"פנייה חדשה ל-legal-eye התקבלה זה עתה.\n\n"
                    f"מזהה: {record.get('id', '?')}\n"
                    f"זמן: {record.get('iso', '?')}\n\n"
                    f"שאלה:\n{record.get('question', '')}\n\n"
                    f"שם: {record.get('name', '(לא צוין)')}\n"
                    f"קשר: {record.get('contact', '?')}\n"
                    f"דרך מועדפת: {record.get('contact_method', 'either')}\n"
                    f"תחום: {record.get('domain', '?')}\n"
                    f"סיווג: {record.get('triage_category', '?')}\n"
                    f"רמת סיכון: {record.get('risk_level', '?')}\n"
                    f"דחוף: {'כן' if record.get('urgent') else 'לא'}\n\n"
                    f"הערות:\n{record.get('notes', '(אין)')}"
                )
                subject = f"[legal-eye] פנייה חדשה · {record.get('domain', '?')} · {record.get('id', '?')[:8]}"
                ok = _send_email_branded(
                    to_addr, subject, body,
                    title="🆕 פנייה חדשה ל-legal-eye",
                    cta_label="לדשבורד שיוך עורכי דין ←",
                    cta_url="https://legal-eye.1bigfam.com/admin-leads/lawyer-requests.html",
                )
                if ok:
                    print(f"[lead-notify] email OK → {to_addr}")
        except Exception as e:
            print(f"[lead-notify] email outer FAIL: {e}")

    threading.Thread(target=_worker, daemon=True).start()


class _LawyerRequestBody(BaseModel):  # type: ignore
    question:         str
    name:             Optional[str] = ""
    contact:          str                       # phone or email
    contact_method:   Optional[str] = "either"  # whatsapp / email / phone / either
    domain:           Optional[str] = ""
    triage_category:  Optional[str] = ""
    risk_level:       Optional[str] = ""
    urgent:           Optional[bool] = False
    notes:            Optional[str] = ""
    hp:               Optional[str] = ""    # v2.99.180 — honeypot
    hp2:              Optional[str] = ""    # v2.99.180 — honeypot


@app.post("/v1/lawyer-request/submit")
def lawyer_request_submit(req: _LawyerRequestBody, request: Request):  # type: ignore
    """v2.99.120 — Save a lawyer-contact request to the waitlist.
    v2.99.130 — Attaches user_id if signed in (Authorization: Bearer ...).
    v2.99.180 — honeypot + IP rate limit (3/min, burst 5).
    No lawyer pool exists yet; this is the lead-capture before launch."""
    import time, uuid
    # Honeypot — bots fill, humans don't see. Silently accept then drop.
    if _honeypot_caught(req.hp, req.hp2):
        return {"ok": True, "id": "drop", "message": "received"}
    if not _public_rate_check(request, "lawyer-request", per_min=3, burst=5):
        return JSONResponse(status_code=429, content={"ok": False, "reason": "rate_limited"})
    try:
        # v2.99.130 — Optional user attribution
        signed_in_user = None
        try:
            from ..auth import session_from_header
            sess = session_from_header(request.headers.get("authorization"))
            if sess:
                signed_in_user = {"id": sess["user_id"], "email": sess["email"]}
        except Exception: pass
        q = (req.question or "").strip()
        c = (req.contact or "").strip()
        if len(q) < 4 or len(c) < 4:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "missing_required_fields",
            })
        _LAWYER_REQUEST_PATH.parent.mkdir(parents=True, exist_ok=True)
        record = {
            "id":               str(uuid.uuid4()),
            "ts":               time.time(),
            "iso":              time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
            "question":         q,
            "name":             (req.name or "").strip(),
            "contact":          c,
            "contact_method":   req.contact_method or "either",
            "domain":           req.domain or "",
            "triage_category":  req.triage_category or "",
            "risk_level":       req.risk_level or "",
            "urgent":           bool(req.urgent),
            "notes":            (req.notes or "").strip(),
            "status":           "new",
            "user_id":          signed_in_user["id"]    if signed_in_user else None,
            "user_email":       signed_in_user["email"] if signed_in_user else None,
        }
        with _LAWYER_REQUEST_LOCK:
            with open(_LAWYER_REQUEST_PATH, "a", encoding="utf-8") as f:
                f.write(json.dumps(record, ensure_ascii=False) + "\n")
        # v2.99.122 — fire async notification (webhook + optional email)
        _notify_lead_async(record)
        # v2.99.144 — audit
        _audit_log(
            actor=signed_in_user["email"] if signed_in_user else "anonymous",
            role="user" if signed_in_user else "anonymous",
            action="request_submitted",
            target=record["id"],
            meta={"domain": record["domain"], "urgent": record["urgent"], "has_attachment": False},
        )
        # Always log to stdout (HF Space logs)
        print(f"[lawyer-request] new #{record['id'][:8]} domain={record['domain']} "
              f"urgent={record['urgent']} contact={record['contact_method']}")
        return {
            "ok":            True,
            "id":            record["id"],
            "expected_sla":  "24 hours" if not record["urgent"] else "4 hours",
            "message":       "בקשתך התקבלה. נחזור אליך תוך 24 שעות (4 שעות לדחוף).",
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.post("/v1/lawyer-request/test-notify")
def lawyer_request_test_notify(token: str = ""):  # type: ignore
    """Admin: fire a test notification (webhook + email) without saving
    anything to the waitlist. Useful to verify env-var config."""
    import os, time, uuid
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    record = {
        "id":               "test-" + str(uuid.uuid4())[:8],
        "ts":               time.time(),
        "iso":              time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
        "question":         "🧪 בדיקת notification — אין צורך לטפל",
        "name":             "Test",
        "contact":          "test@example.com",
        "contact_method":   "email",
        "domain":           "employment_law",
        "triage_category":  "personal_legal_advice",
        "risk_level":       "medium_high",
        "urgent":           False,
        "notes":            "Sent from /v1/lawyer-request/test-notify",
        "status":           "test",
    }
    _notify_lead_async(record)
    return {
        "ok":              True,
        "message":         "Notification fired in background; check webhook/email destination.",
        "webhook_url_set": bool(os.environ.get("LE_LEAD_WEBHOOK_URL")),
        "email_to_set":    bool(os.environ.get("LE_LEAD_EMAIL_TO")),
        "email_user_set":  bool(os.environ.get("LE_LEAD_EMAIL_SMTP_USER")),
    }


# ──────────────────────────────────────────────────────────────────────
# v2.99.124 — Document upload v1
# A user with a personal case can attach a document to their lawyer
# request. V1 does NOT do AI document review — the file goes to the
# admin queue alongside the lawyer-request record, and a human (the
# operator) reviews + routes it to a verified lawyer.
# ──────────────────────────────────────────────────────────────────────
_DOC_UPLOAD_DIR = _wa_pl.Path("tau_rag/runtime/document_uploads")
_DOC_ALLOWED_EXT = {".pdf", ".docx", ".doc", ".png", ".jpg", ".jpeg", ".txt", ".rtf"}
_DOC_MAX_BYTES = 10 * 1024 * 1024  # 10 MB

# v2.99.137 — lawyer deliverables (what the lawyer hands back to the client)
_DELIVERABLE_DIR = _wa_pl.Path("tau_rag/runtime/deliverables")

# v2.99.149 — Weekly admin digest state
_DIGEST_STATE_PATH = _wa_pl.Path("tau_rag/runtime/admin_digest_state.json")


def _compose_admin_digest() -> str:
    """Build the weekly Hebrew digest body from current JSONL state.
    Window: last 7 days."""
    import time
    now = time.time()
    window_start = now - (7 * 24 * 60 * 60)
    # Load all relevant files
    requests = []
    if _LAWYER_REQUEST_PATH.exists():
        try:
            with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: requests.append(json.loads(line))
                        except: continue
        except Exception: pass
    apps = []
    if _LAWYER_APP_PATH.exists():
        try:
            with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: apps.append(json.loads(line))
                        except: continue
        except Exception: pass

    # Activity in the window
    new_reqs    = [r for r in requests if (r.get("ts") or 0) >= window_start]
    new_apps    = [a for a in apps     if (a.get("ts") or 0) >= window_start]
    delivered   = [r for r in requests if r.get("deliverable_ts") and r["deliverable_ts"] >= window_start]
    approved    = [r for r in requests if r.get("approval_ts")    and r["approval_ts"]    >= window_start and not r.get("auto_approved")]
    auto_releas = [r for r in requests if r.get("approval_ts")    and r["approval_ts"]    >= window_start and r.get("auto_approved")]
    disputed_w  = [r for r in requests if r.get("dispute_ts")     and r["dispute_ts"]     >= window_start]
    resolved_w  = [r for r in requests if r.get("dispute_resolution_ts") and r["dispute_resolution_ts"] >= window_start]

    # Attention items (current state, not windowed)
    open_disputes = [r for r in requests if r.get("status") == "disputed"]
    pending_apps  = [a for a in apps if (a.get("status") or "submitted") in ("submitted", "under_review")]
    stale_new     = [r for r in requests if r.get("status") in ("new", "offered") and (now - (r.get("ts") or now)) > 24 * 3600]
    contacted_no_deliv = [r for r in requests
                          if r.get("status") == "contacted"
                          and r.get("lawyer_response") == "accepted"
                          and (now - (r.get("assigned_ts") or now)) > 48 * 3600]

    # Top performers (verified lawyers w/ best acceptance + activity)
    from ..matching import stats_for_all_lawyers
    try: all_stats = stats_for_all_lawyers()
    except: all_stats = {}
    perf = []
    for a in apps:
        if a.get("status") != "verified": continue
        s = all_stats.get(a.get("id", ""), {})
        if not s.get("received"): continue
        perf.append({
            "name":      a.get("full_name", "?"),
            "received":  s["received"],
            "accepts":   s.get("direct_accepted", 0) + s.get("race_accepted", 0),
            "rate":      s.get("acceptance_rate"),
            "active":    s.get("active", 0),
        })
    perf.sort(key=lambda x: (-(x.get("accepts") or 0), -(x.get("rate") or 0)))

    # Domain distribution this week
    domain_count: Dict[str, int] = {}
    for r in new_reqs:
        d = r.get("domain") or "(לא סווג)"
        domain_count[d] = domain_count.get(d, 0) + 1
    domains_str = " · ".join(f"{d}: {n}" for d, n in sorted(domain_count.items(), key=lambda kv: -kv[1])[:6]) or "(אין)"

    iso_now   = time.strftime("%d/%m/%Y", time.localtime(now))
    iso_start = time.strftime("%d/%m/%Y", time.localtime(window_start))

    body = (
        f"שלום,\n\n"
        f"סיכום שבועי של legal-eye · {iso_start} → {iso_now}\n\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        f"📊 פעילות (7 ימים)\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
        f"   {len(new_reqs):>3}  פניות חדשות\n"
        f"   {len(new_apps):>3}  בקשות הצטרפות עו\"ד\n"
        f"   {len(delivered):>3}  תוצרים הוגשו\n"
        f"   {len(approved):>3}  אושרו ידנית\n"
        f"   {len(auto_releas):>3}  שוחררו אוטומטית (72h)\n"
        f"   {len(disputed_w):>3}  מחלוקות נפתחו\n"
        f"   {len(resolved_w):>3}  מחלוקות נסגרו\n\n"
        f"תפוצת תחומים: {domains_str}\n\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
        f"⚠ דרוש טיפול (מצב נוכחי)\n"
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
        f"   {len(open_disputes):>3}  מחלוקות פתוחות\n"
        f"   {len(pending_apps):>3}  בקשות עו\"ד ממתינות לאימות\n"
        f"   {len(stale_new):>3}  פניות 'new'/'offered' מעל 24 שעות\n"
        f"   {len(contacted_no_deliv):>3}  פניות שעו\"ד קיבל ולא הגיש תוצר >48 שעות\n\n"
    )
    if perf:
        body += (
            f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
            f"🏆 מובילי הפול (עו\"ד מאומתים)\n"
            f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
        )
        for p in perf[:5]:
            rate_str = f"{p['rate']}%" if p['rate'] is not None else "—"
            body += f"   עו\"ד {p['name']:<25s} | {p['accepts']}/{p['received']} פניות | קבלה: {rate_str}\n"
        body += "\n"
    body += (
        f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"
        f"מסכי ניהול:\n"
        f"  פניות:      https://legal-eye.1bigfam.com/admin-leads/lawyer-requests.html\n"
        f"  בקשות עו\"ד: https://legal-eye.1bigfam.com/admin-leads/lawyer-applications.html\n"
        f"  Triage log: https://legal-eye.1bigfam.com/admin-leads/triage-log.html\n"
        f"  Audit log:  https://legal-eye.1bigfam.com/admin-leads/audit-log.html\n\n"
        f"— legal-eye · weekly digest · {iso_now}\n"
    )
    return body


def _send_admin_digest(body: str) -> bool:
    """v2.99.151 — Send the digest via branded helper. Sync."""
    import os, time
    to_addr = os.environ.get("LE_LEAD_EMAIL_TO", "").strip()
    if not to_addr:
        print("[digest] missing LE_LEAD_EMAIL_TO")
        return False
    ok = _send_email_branded(
        to_addr,
        f"[legal-eye] סיכום שבועי · {time.strftime('%d/%m/%Y')}",
        body,
        title=f"סיכום שבועי · {time.strftime('%d/%m/%Y')}",
        cta_label="לדשבורד הניהול ←",
        cta_url="https://legal-eye.1bigfam.com/admin-leads/lawyer-requests.html",
        timeout=30,
    )
    if ok:
        print(f"[digest] sent → {to_addr}")
    return ok


def _load_digest_state() -> dict:
    if not _DIGEST_STATE_PATH.exists(): return {}
    try:
        with open(_DIGEST_STATE_PATH, "r", encoding="utf-8") as f:
            return json.load(f)
    except: return {}


def _save_digest_state(s: dict) -> None:
    try:
        _DIGEST_STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
        with open(_DIGEST_STATE_PATH, "w", encoding="utf-8") as f:
            json.dump(s, f, ensure_ascii=False)
    except Exception as e:
        print(f"[digest] state save FAIL: {e}")


def _start_admin_digest_thread() -> None:
    """v2.99.149 — Daemon thread. Sends digest once per week on Sunday
    between 09:00–10:00 Israeli time. Uses a state file to avoid double-fire
    across restarts."""
    import threading, time
    def _loop():
        # Initial warmup delay — let app finish booting
        time.sleep(90)
        while True:
            try:
                now = time.time()
                lt = time.localtime(now)
                # Sunday = weekday 6 in Python (Mon=0...Sun=6)
                # In Israel, the work week starts Sunday — locale-wise that's
                # 'weekday' index 6 in the standard tm_wday (Mon=0).
                # Anchor: send on Sunday 9-10 AM local
                is_sunday   = (lt.tm_wday == 6)
                is_morning  = (9 <= lt.tm_hour < 10)
                if is_sunday and is_morning:
                    st = _load_digest_state()
                    last_iso = st.get("last_sent_iso", "")
                    today_iso = time.strftime("%Y-%m-%d", lt)
                    if not last_iso.startswith(today_iso):
                        body = _compose_admin_digest()
                        ok = _send_admin_digest(body)
                        if ok:
                            _save_digest_state({
                                "last_sent_ts":  now,
                                "last_sent_iso": time.strftime("%Y-%m-%dT%H:%M:%S", lt),
                            })
                            _audit_log(
                                actor="system", role="system", action="admin_digest_sent",
                                target="", meta={"sent_at": today_iso},
                            )
            except Exception as e:
                print(f"[digest-loop] FAIL: {e}")
            time.sleep(30 * 60)   # check every 30 min
    threading.Thread(target=_loop, daemon=True, name="admin-digest").start()
    print("[digest] background thread started (Sundays 9-10 AM)")


_start_admin_digest_thread()


@app.post("/v1/admin/send-digest-now")
def admin_send_digest_now(token: str = ""):  # type: ignore
    """Admin: trigger the weekly digest manually (for testing or ad-hoc)."""
    import os, time
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    body = _compose_admin_digest()
    ok = _send_admin_digest(body)
    if ok:
        _save_digest_state({
            "last_sent_ts":  time.time(),
            "last_sent_iso": time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime()),
        })
        _audit_log(actor="admin", role="admin", action="admin_digest_sent_manual",
                   target="", meta={})
    return {"ok": True, "sent": ok, "body_preview": body[:1200]}


# v2.99.143 — In-app notifications storage
_NOTIFICATIONS_PATH = _wa_pl.Path("tau_rag/runtime/notifications.jsonl")
_NOTIFICATIONS_LOCK = _wa_th.Lock()

# v2.99.144 — Audit log storage
_AUDIT_LOG_PATH = _wa_pl.Path("tau_rag/runtime/audit_log.jsonl")
_AUDIT_LOG_LOCK = _wa_th.Lock()

# v2.99.150 — Beta invite codes + feedback
_BETA_INVITES_PATH = _wa_pl.Path("tau_rag/runtime/beta_invites.jsonl")
_BETA_INVITES_LOCK = _wa_th.Lock()
_FEEDBACK_PATH     = _wa_pl.Path("tau_rag/runtime/feedback.jsonl")
_FEEDBACK_LOCK     = _wa_th.Lock()


def _validate_invite_code(code: str) -> dict:
    """Returns {ok, code, uses_left, label, expired} for a given code."""
    import time
    code = (code or "").strip().upper()
    if not code:
        return {"ok": False, "reason": "empty_code"}
    if not _BETA_INVITES_PATH.exists():
        return {"ok": False, "reason": "no_invites"}
    try:
        with open(_BETA_INVITES_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if rec.get("code", "").upper() == code:
                    if rec.get("expires_ts") and rec["expires_ts"] < time.time():
                        return {"ok": False, "reason": "expired"}
                    uses_left = (rec.get("max_uses") or 1) - (rec.get("used_count") or 0)
                    if uses_left <= 0:
                        return {"ok": False, "reason": "exhausted"}
                    return {
                        "ok":         True,
                        "code":       code,
                        "label":      rec.get("label", ""),
                        "uses_left":  uses_left,
                        "max_uses":   rec.get("max_uses") or 1,
                    }
        return {"ok": False, "reason": "not_found"}
    except Exception as e:
        return {"ok": False, "reason": f"error:{type(e).__name__}"}


def _consume_invite_code(code: str, who_email: str = "") -> bool:
    """Increment used_count atomically. Returns True if consumed."""
    import time
    code = (code or "").strip().upper()
    if not code: return False
    with _BETA_INVITES_LOCK:
        items = []
        if _BETA_INVITES_PATH.exists():
            try:
                with open(_BETA_INVITES_PATH, "r", encoding="utf-8") as f:
                    for line in f:
                        line = line.strip()
                        if line:
                            try: items.append(json.loads(line))
                            except: continue
            except Exception: return False
        found = False
        for rec in items:
            if rec.get("code", "").upper() == code:
                rec["used_count"] = (rec.get("used_count") or 0) + 1
                used_by = rec.setdefault("used_by", [])
                used_by.append({
                    "email": who_email,
                    "ts":    time.time(),
                    "iso":   time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
                })
                found = True
                break
        if not found: return False
        tmp = _BETA_INVITES_PATH.with_suffix(".jsonl.tmp")
        with open(tmp, "w", encoding="utf-8") as f:
            for rec in items:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        tmp.replace(_BETA_INVITES_PATH)
    return True


class _CreateInviteBody(BaseModel):  # type: ignore
    code:             Optional[str] = ""           # auto-generate if empty
    max_uses:         int            = 1
    expires_in_days:  Optional[int]  = None        # None = never
    label:            Optional[str]  = ""          # internal note


@app.post("/v1/admin/invite/create")
def admin_invite_create(body: _CreateInviteBody, token: str = ""):  # type: ignore
    """Admin: create a beta invite code."""
    import os, time, secrets, string
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    code = (body.code or "").strip().upper()
    if not code:
        alphabet = string.ascii_uppercase + string.digits
        # Avoid ambiguous chars
        alphabet = alphabet.replace("0", "").replace("O", "").replace("1", "").replace("I", "").replace("L", "")
        code = "".join(secrets.choice(alphabet) for _ in range(8))
    # Check duplicate
    if _validate_invite_code(code).get("ok"):
        return JSONResponse(status_code=409, content={"ok": False, "reason": "duplicate"})
    record = {
        "code":         code,
        "label":        (body.label or "").strip(),
        "max_uses":     max(1, int(body.max_uses)),
        "used_count":   0,
        "used_by":      [],
        "created_ts":   time.time(),
        "created_iso":  time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
        "expires_ts":   (time.time() + body.expires_in_days * 86400) if body.expires_in_days else None,
        "expires_iso":  (time.strftime("%Y-%m-%dT%H:%M:%S%z",
                          time.localtime(time.time() + body.expires_in_days * 86400))
                         if body.expires_in_days else None),
    }
    with _BETA_INVITES_LOCK:
        _BETA_INVITES_PATH.parent.mkdir(parents=True, exist_ok=True)
        with open(_BETA_INVITES_PATH, "a", encoding="utf-8") as f:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")
    _audit_log(actor="admin", role="admin", action="invite_created",
               target=code, meta={"label": record["label"], "max_uses": record["max_uses"]})
    return {"ok": True, "code": code, "record": record}


@app.get("/v1/admin/invite/list")
def admin_invite_list(token: str = ""):  # type: ignore
    """Admin: list all invite codes with usage."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if not _BETA_INVITES_PATH.exists():
        return {"ok": True, "n": 0, "items": []}
    items = []
    with open(_BETA_INVITES_PATH, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if line:
                try: items.append(json.loads(line))
                except: continue
    items.sort(key=lambda r: -(r.get("created_ts") or 0))
    return {"ok": True, "n": len(items), "items": items}


@app.post("/v1/beta/invite/validate")
def beta_invite_validate(req: dict):  # type: ignore
    """Public: validate a code without consuming. Used by onboarding form
    to give live feedback as user types."""
    return _validate_invite_code((req or {}).get("code", ""))


class _FeedbackBody(BaseModel):  # type: ignore
    message: str
    kind:    Optional[str] = "general"   # bug | suggestion | general | praise
    page:    Optional[str] = ""           # URL where they were
    rating:  Optional[int] = None         # 1-5
    hp:      Optional[str] = ""           # v2.99.180 — honeypot
    hp2:     Optional[str] = ""           # v2.99.180 — honeypot


# v2.99.180 — Spam protection for public POST endpoints.
# IP-based token bucket: per-IP, per-endpoint-key, refilling at N/min.
# Lighter than the webhook rate limiter (_wa_check_rate_limit) which is
# auth-aware. This one just protects against burst spam.
_PUBLIC_RATE_BUCKETS: Dict[str, dict] = {}
_PUBLIC_RATE_LOCK = _wa_th.Lock()


def _client_ip(request) -> str:
    """Best-effort client IP (works behind Vercel/HF proxies)."""
    try:
        xff = request.headers.get("x-forwarded-for", "")
        if xff:
            return xff.split(",")[0].strip()
        return (request.client.host if request.client else "unknown") or "unknown"
    except Exception:
        return "unknown"


def _public_rate_check(request, endpoint_key: str,
                       per_min: int = 5, burst: int = 8) -> bool:
    """Returns True if request is allowed, False if rate-limited.
    Token bucket per (IP, endpoint_key). Defaults: 5/min, burst 8."""
    import time
    ip = _client_ip(request)
    key = f"{endpoint_key}:{ip}"
    now = time.time()
    with _PUBLIC_RATE_LOCK:
        bucket = _PUBLIC_RATE_BUCKETS.get(key)
        if bucket is None:
            bucket = {"tokens": float(burst), "ts": now}
            _PUBLIC_RATE_BUCKETS[key] = bucket
        elapsed = now - bucket["ts"]
        bucket["tokens"] = min(float(burst),
                                bucket["tokens"] + elapsed * (per_min / 60.0))
        bucket["ts"] = now
        if bucket["tokens"] < 1.0:
            return False
        bucket["tokens"] -= 1.0
        # Cleanup: drop buckets idle >15 min
        if len(_PUBLIC_RATE_BUCKETS) > 500:
            cutoff = now - 900
            for k in list(_PUBLIC_RATE_BUCKETS.keys()):
                if _PUBLIC_RATE_BUCKETS[k]["ts"] < cutoff:
                    del _PUBLIC_RATE_BUCKETS[k]
        return True


def _honeypot_caught(*field_values) -> bool:
    """Return True if any honeypot field is non-empty (bot signature).
    Pass multiple values, all should be empty strings for a real human."""
    for v in field_values:
        if v and v.strip():
            return True
    return False


# v2.99.177 — Newsletter subscribers
_NEWSLETTER_PATH = _wa_pl.Path("tau_rag/runtime/newsletter_subscribers.jsonl")
_NEWSLETTER_LOCK = _wa_th.Lock()


class _NewsletterBody(BaseModel):  # type: ignore
    email:  str
    source: Optional[str] = ""    # which page/CTA
    hp:     Optional[str] = ""    # v2.99.180 — honeypot (bots fill; humans never see)
    hp2:    Optional[str] = ""    # v2.99.180 — secondary honeypot


@app.post("/v1/newsletter/subscribe")
def newsletter_subscribe(req: _NewsletterBody, request: Request):  # type: ignore
    """v2.99.177 — Append email to newsletter subscribers. Idempotent
    (silently dedupes if already subscribed).
    v2.99.180 — honeypot + IP rate limit (5/min, burst 8).
    v2.99.192 — double opt-in: subscribers start as `pending`,
    confirmation email sent with HMAC link. Sender skips pending users."""
    import time, uuid, re
    # Honeypot — bots fill these fields; humans never see them.
    if _honeypot_caught(req.hp, req.hp2):
        return {"ok": True, "status": "subscribed"}
    if not _public_rate_check(request, "newsletter", per_min=5, burst=8):
        return JSONResponse(status_code=429, content={"ok": False, "reason": "rate_limited"})
    em = (req.email or "").strip().lower()
    if not em or "@" not in em or "." not in em.split("@")[-1]:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_email"})
    # Length guard
    if len(em) > 200:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "email_too_long"})
    # Dedupe — check existing
    existing = set()
    if _NEWSLETTER_PATH.exists():
        try:
            with open(_NEWSLETTER_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try:
                        existing.add(json.loads(line).get("email", "").lower())
                    except: continue
        except Exception: pass
    if em in existing:
        return {"ok": True, "status": "already_subscribed"}
    rec = {
        "id":      str(uuid.uuid4())[:12],
        "ts":      time.time(),
        "iso":     time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
        "email":   em,
        "source":  (req.source or "")[:120],
        "ip":      (request.client.host if request.client else "")[:45],
        "ua":      (request.headers.get("user-agent", "") or "")[:200],
        # v2.99.192 — double opt-in: starts as pending until user clicks link
        "pending": True,
    }
    try:
        _NEWSLETTER_PATH.parent.mkdir(parents=True, exist_ok=True)
        with _NEWSLETTER_LOCK:
            with open(_NEWSLETTER_PATH, "a", encoding="utf-8") as f:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        _audit_log(actor=em, role="user", action="newsletter_subscribe",
                   target=rec["id"], meta={"source": rec["source"]})
        # v2.99.192 — fire verification email (best-effort, async)
        try:
            _send_newsletter_verification_async(em, rec.get("source", ""))
        except Exception: pass
        return {"ok": True, "status": "pending_verification"}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


def _newsletter_verify_token(email: str) -> str:
    """v2.99.192 — HMAC token for double opt-in verification.
    Distinct from unsubscribe token (different domain prefix)."""
    import hmac, hashlib, os
    secret = os.environ.get("LE_ADMIN_TOKEN", "le-fallback-secret").encode("utf-8")
    msg = ("verify:" + email.lower()).encode("utf-8")
    return hmac.new(secret, msg, hashlib.sha256).hexdigest()[:24]


def _send_newsletter_verification_async(email: str, source: str = "") -> None:
    """Send branded verification email asynchronously."""
    import threading
    from urllib.parse import quote
    def _worker():
        verify_url = (
            f"https://legal-eye.1bigfam.com/v1/newsletter/verify"
            f"?email={quote(email)}&t={_newsletter_verify_token(email)}"
        )
        body = (
            f"שלום,\n\n"
            f"ביקשת לקבל newsletter מ-legal-eye. כדי לאשר את המנוי, לחץ על הקישור:\n\n"
            f"{verify_url}\n\n"
            f"אם לא ביקשת — התעלם מהאימייל. לא נשמור את האימייל שלך אם לא תאשר.\n\n"
            f"הקישור תקף ל-7 ימים."
        )
        _send_email_branded(
            email,
            "legal-eye · אישור מנוי newsletter",
            body,
            title="אשר את המנוי",
            cta_label="אשר מנוי ←",
            cta_url=verify_url,
        )
    threading.Thread(target=_worker, daemon=True).start()


@app.get("/v1/newsletter/verify")
def newsletter_verify(email: str = "", t: str = ""):  # type: ignore
    """v2.99.192 — Double opt-in: verify email via HMAC token.
    Marks subscriber as no-longer-pending and adds verified_ts."""
    from fastapi.responses import HTMLResponse
    import time
    em = (email or "").strip().lower()
    expected = _newsletter_verify_token(em) if em else ""
    if not em or not t or t != expected:
        return HTMLResponse(content=_unsub_page(False, "קישור לא תקף או פג תוקף."), status_code=400)
    items = _load_newsletter_subscribers()
    found = False
    for it in items:
        if (it.get("email", "") or "").lower() == em:
            it.pop("pending", None)
            it["verified_ts"]  = time.time()
            it["verified_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
            found = True
    if not found:
        return HTMLResponse(content=_unsub_page(False, "האימייל לא נמצא ברשימה. אולי כבר אישרת או הוסרת."), status_code=404)
    _save_newsletter_subscribers(items)
    _audit_log(actor=em, role="user", action="newsletter_verified", target="(self)", meta={})
    return HTMLResponse(content=_verify_success_page(em))


def _verify_success_page(email: str) -> str:
    """Branded confirmation page after successful verification."""
    return f"""<!doctype html>
<html lang="he" dir="rtl"><head>
<meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>✓ אושר · legal-eye</title>
<meta name="robots" content="noindex,nofollow">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Frank+Ruhl+Libre:wght@400;700&family=Heebo:wght@400;500&display=swap" rel="stylesheet">
<style>
body{{margin:0;background:#FBF8F1;font-family:Heebo,sans-serif;color:#0A0F1A;direction:rtl;min-height:100vh;display:flex;align-items:center;justify-content:center;padding:20px}}
.card{{background:white;border:1px solid #E8E2D2;max-width:480px;width:100%;padding:40px 36px;position:relative}}
.card::before{{content:'';position:absolute;top:0;right:0;width:4px;height:60px;background:linear-gradient(180deg,#C89B3C,transparent)}}
.logo{{font-family:'Frank Ruhl Libre',serif;font-size:18px;font-weight:700;color:#0E2A47;letter-spacing:1.5px;margin-bottom:24px}}
.logo .accent{{color:#C89B3C}}
.success{{width:64px;height:64px;background:#4A6B3E;color:white;border-radius:50%;display:flex;align-items:center;justify-content:center;font-size:30px;font-weight:700;margin-bottom:20px}}
h1{{font-family:'Frank Ruhl Libre',serif;font-size:28px;font-weight:700;color:#0E2A47;margin-bottom:12px}}
p{{font-size:15px;color:#2C3344;line-height:1.7;margin-bottom:18px}}
a{{color:#8C6A20;font-weight:600;text-decoration:none}}
code{{font-family:'JetBrains Mono',monospace;background:#F4EFE2;padding:2px 6px;border-radius:2px;font-size:13px}}
.footer{{font-family:'Frank Ruhl Libre',serif;font-style:italic;color:#5C677A;font-size:13px;margin-top:24px;padding-top:18px;border-top:1px solid #F0EBDC}}
</style></head>
<body><div class="card">
<div class="logo">LEGAL<span class="accent"> EYE</span></div>
<div class="success">✓</div>
<h1>המנוי אושר</h1>
<p>תקבל מאיתנו עדכון אחד ~פעם בחודש: השקות, eval שבועי, החלטות גדולות. ביטול בכל עת.</p>
<p><small style="color:#5C677A">אימייל: <code>{email}</code></small></p>
<p><a href="/">לחזרה לאתר ←</a></p>
<div class="footer">רואים משפט אחרת</div>
</div></body></html>"""


@app.get("/v1/admin/newsletter/list")
def admin_newsletter_list(token: str = "", limit: int = 500):  # type: ignore
    """v2.99.177 — Admin: list newsletter subscribers."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    items = []
    if _NEWSLETTER_PATH.exists():
        try:
            with open(_NEWSLETTER_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: items.append(json.loads(line))
                    except: continue
        except Exception: pass
    items.sort(key=lambda r: -(r.get("ts") or 0))
    return {"ok": True, "n_total": len(items), "items": items[:max(1, min(limit, 2000))]}


# Add newsletter file to backup
try:
    if "tau_rag/runtime/newsletter_subscribers.jsonl" not in _BACKUP_FILE_PATHS:
        _BACKUP_FILE_PATHS.append("tau_rag/runtime/newsletter_subscribers.jsonl")
except Exception: pass


# v2.99.184 — Newsletter sender + unsubscribe (HMAC-token based)
def _newsletter_token(email: str) -> str:
    """Generate stable HMAC token for unsubscribe link.
    Uses LE_ADMIN_TOKEN as secret. 24-hex chars (96-bit) — enough to
    prevent brute force, short enough to fit in URL."""
    import hmac, hashlib, os
    secret = os.environ.get("LE_ADMIN_TOKEN", "le-fallback-secret").encode("utf-8")
    return hmac.new(secret, email.lower().encode("utf-8"), hashlib.sha256).hexdigest()[:24]


def _newsletter_unsubscribe_url(email: str) -> str:
    """Build the public unsubscribe URL for a subscriber."""
    from urllib.parse import quote
    return f"https://legal-eye.1bigfam.com/v1/newsletter/unsubscribe?email={quote(email)}&t={_newsletter_token(email)}"


def _load_newsletter_subscribers() -> List[dict]:
    items = []
    if not _NEWSLETTER_PATH.exists(): return items
    try:
        with open(_NEWSLETTER_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: items.append(json.loads(line))
                except: continue
    except Exception: pass
    return items


def _save_newsletter_subscribers(items: List[dict]) -> None:
    """Atomic JSONL rewrite."""
    with _NEWSLETTER_LOCK:
        tmp = _NEWSLETTER_PATH.with_suffix(".jsonl.tmp")
        with open(tmp, "w", encoding="utf-8") as f:
            for it in items:
                f.write(json.dumps(it, ensure_ascii=False) + "\n")
        tmp.replace(_NEWSLETTER_PATH)


@app.get("/v1/newsletter/unsubscribe")
def newsletter_unsubscribe(email: str = "", t: str = ""):  # type: ignore
    """v2.99.184 — Public unsubscribe with HMAC token verification.
    Marks subscriber with unsubscribed_ts; doesn't delete (so admin can
    see history)."""
    from fastapi.responses import HTMLResponse
    import time
    em = (email or "").strip().lower()
    expected = _newsletter_token(em) if em else ""
    if not em or not t or t != expected:
        return HTMLResponse(content=_unsub_page(False, "קישור לא תקף או פג תוקף."), status_code=400)
    items = _load_newsletter_subscribers()
    found = False
    for it in items:
        if (it.get("email", "") or "").lower() == em:
            it["unsubscribed_ts"] = time.time()
            it["unsubscribed_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
            found = True
    if not found:
        return HTMLResponse(content=_unsub_page(False, "האימייל לא נמצא ברשימת המנויים."), status_code=404)
    _save_newsletter_subscribers(items)
    _audit_log(actor=em, role="user", action="newsletter_unsubscribe",
               target="(self)", meta={})
    return HTMLResponse(content=_unsub_page(True, em))


def _unsub_page(success: bool, detail: str) -> str:
    """Branded HTML response for unsubscribe action."""
    if success:
        title = "המנוי הוסר ✓"
        body = f"לא תקבל יותר אימיילים מ-legal-eye.<br><br><small style='color:#5C677A'>אימייל: <code>{detail}</code></small>"
    else:
        title = "לא ניתן להסיר"
        body = detail
    return f"""<!doctype html>
<html lang="he" dir="rtl"><head>
<meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>{title} · legal-eye</title>
<meta name="robots" content="noindex,nofollow">
<link rel="preconnect" href="https://fonts.googleapis.com">
<link href="https://fonts.googleapis.com/css2?family=Frank+Ruhl+Libre:wght@400;700&family=Heebo:wght@400;500&display=swap" rel="stylesheet">
<style>
body{{margin:0;background:#FBF8F1;font-family:Heebo,sans-serif;color:#0A0F1A;direction:rtl;min-height:100vh;display:flex;align-items:center;justify-content:center;padding:20px}}
.card{{background:white;border:1px solid #E8E2D2;max-width:480px;width:100%;padding:40px 36px;position:relative}}
.card::before{{content:'';position:absolute;top:0;right:0;width:4px;height:60px;background:linear-gradient(180deg,#C89B3C,transparent)}}
.logo{{font-family:'Frank Ruhl Libre',serif;font-size:18px;font-weight:700;color:#0E2A47;letter-spacing:1.5px;margin-bottom:24px}}
.logo .accent{{color:#C89B3C}}
h1{{font-family:'Frank Ruhl Libre',serif;font-size:28px;font-weight:700;color:#0E2A47;margin-bottom:12px}}
p{{font-size:15px;color:#2C3344;line-height:1.7;margin-bottom:18px}}
a{{color:#8C6A20;font-weight:600;text-decoration:none}}
code{{font-family:'JetBrains Mono',monospace;background:#F4EFE2;padding:2px 6px;border-radius:2px;font-size:13px}}
.footer{{font-family:'Frank Ruhl Libre',serif;font-style:italic;color:#5C677A;font-size:13px;margin-top:24px;padding-top:18px;border-top:1px solid #F0EBDC}}
</style></head>
<body><div class="card">
<div class="logo">LEGAL<span class="accent"> EYE</span></div>
<h1>{title}</h1>
<p>{body}</p>
<p><a href="/">לחזרה לאתר ←</a></p>
<div class="footer">רואים משפט אחרת</div>
</div></body></html>"""


class _NewsletterSendBody(BaseModel):  # type: ignore
    subject:    str
    body_text:  str
    body_html:  Optional[str] = ""    # if empty, body_text is wrapped in branded template
    dry_run:    Optional[bool] = False  # if True, count but don't send


_NEWSLETTER_SENDS_PATH = _wa_pl.Path("tau_rag/runtime/newsletter_sends.jsonl")


@app.post("/v1/admin/newsletter/send")
def admin_newsletter_send(req: _NewsletterSendBody, token: str = ""):  # type: ignore
    """v2.99.184 — Send a newsletter to ALL subscribed (non-unsubscribed) emails.
    Each email gets a unique unsubscribe link in the footer.
    Returns send stats (count + per-email status).
    Logs to newsletter_sends.jsonl for audit."""
    import os, time, uuid
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})

    subject = (req.subject or "").strip()
    body_text = (req.body_text or "").strip()
    if not subject or len(subject) < 3:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "subject_required"})
    if not body_text or len(body_text) < 10:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "body_required"})

    subscribers = _load_newsletter_subscribers()
    # v2.99.192 — skip pending (unverified) subscribers, not just unsubscribed
    active = [s for s in subscribers
              if not s.get("unsubscribed_ts") and not s.get("pending")]
    emails = [s.get("email", "").lower() for s in active if s.get("email")]
    emails = list(dict.fromkeys(emails))  # dedupe preserving order

    if req.dry_run:
        return {"ok": True, "dry_run": True, "n_recipients": len(emails), "preview_emails": emails[:5]}

    # Iterate + send. Each email gets its own unsubscribe link.
    send_id = str(uuid.uuid4())[:12]
    sent = []
    failed = []
    started_ts = time.time()
    for em in emails:
        unsub_url = _newsletter_unsubscribe_url(em)
        # Append unsubscribe block to body
        body_with_unsub = (
            body_text +
            "\n\n────────────────────────────────────────\n"
            "להסרת המנוי באופן מיידי, לחץ כאן:\n"
            f"{unsub_url}\n"
        )
        ok = _send_email_branded(
            em, subject, body_with_unsub,
            title=subject,
            cta_label="לאתר →",
            cta_url="https://legal-eye.1bigfam.com/",
            timeout=30,
        )
        if ok:
            sent.append(em)
        else:
            failed.append(em)

    elapsed = time.time() - started_ts
    # Audit + persist send log
    record = {
        "send_id":      send_id,
        "ts":           started_ts,
        "iso":          time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime(started_ts)),
        "subject":      subject,
        "n_attempted":  len(emails),
        "n_sent":       len(sent),
        "n_failed":     len(failed),
        "elapsed_sec":  round(elapsed, 1),
    }
    try:
        _NEWSLETTER_SENDS_PATH.parent.mkdir(parents=True, exist_ok=True)
        with open(_NEWSLETTER_SENDS_PATH, "a", encoding="utf-8") as f:
            f.write(json.dumps(record, ensure_ascii=False) + "\n")
    except Exception: pass
    _audit_log(actor="admin", role="admin", action="newsletter_sent",
               target=send_id, meta={"n_sent": len(sent), "n_failed": len(failed), "subject": subject})

    return {
        "ok":           True,
        "send_id":      send_id,
        "n_attempted":  len(emails),
        "n_sent":       len(sent),
        "n_failed":     len(failed),
        "elapsed_sec":  round(elapsed, 1),
        "failed_emails": failed[:20],   # first 20 for inspection
    }


@app.get("/v1/admin/newsletter/sends")
def admin_newsletter_sends_list(token: str = "", limit: int = 50):  # type: ignore
    """v2.99.184 — Admin: history of newsletter sends."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    items = []
    if _NEWSLETTER_SENDS_PATH.exists():
        try:
            with open(_NEWSLETTER_SENDS_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: items.append(json.loads(line))
                    except: continue
        except Exception: pass
    items.sort(key=lambda r: -(r.get("ts") or 0))
    return {"ok": True, "n_total": len(items), "items": items[:max(1, min(limit, 500))]}


# Add newsletter sends file to backup
try:
    if "tau_rag/runtime/newsletter_sends.jsonl" not in _BACKUP_FILE_PATHS:
        _BACKUP_FILE_PATHS.append("tau_rag/runtime/newsletter_sends.jsonl")
except Exception: pass


# v2.99.187 — Test data cleanup
# Default email patterns that suggest test data (used by C-phase smoke tests).
_DEFAULT_TEST_PATTERNS = [
    "@test.com", "@example.com", "@example.net", "@example.org",
    "@invalid.com", "@nowhere.com", "+test@", "+smoke@", "burst+", "test+",
    "@spam.com", "@mailinator.com", "@guerrillamail.",
]


def _record_matches_patterns(rec: dict, patterns: List[str], email_fields: List[str]) -> bool:
    """True if any of the record's email fields contains any pattern."""
    for fld in email_fields:
        v = (rec.get(fld, "") or "").lower()
        if not v: continue
        for p in patterns:
            if p.lower() in v:
                return True
    return False


_CLEANUP_TARGETS = [
    # (jsonl_path, [email_fields_to_check], lock)
    ("tau_rag/runtime/lawyer_requests.jsonl",      ["contact", "user_email", "name"],  "_LAWYER_REQUEST_LOCK"),
    ("tau_rag/runtime/lawyer_applications.jsonl",  ["email"],                          "_LAWYER_APP_LOCK"),
    ("tau_rag/runtime/newsletter_subscribers.jsonl", ["email"],                        "_NEWSLETTER_LOCK"),
    ("tau_rag/runtime/feedback.jsonl",             ["email", "user_email"],            None),
    ("tau_rag/runtime/triage_log.jsonl",           ["text"],                           "_TRIAGE_LOG_LOCK"),
    # users.jsonl handled separately via auth module
]


@app.get("/v1/admin/search")
def admin_search(q: str = "", token: str = "", limit: int = 20):  # type: ignore
    """v2.99.191 — Cross-record admin search.
    Searches across users, lawyer_applications, lawyer_requests,
    newsletter_subscribers, feedback. Returns max `limit` per source.
    Case-insensitive substring match on relevant fields."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    q = (q or "").strip().lower()
    if len(q) < 2:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "query_too_short", "hint": "Min 2 chars"})

    def load_jsonl(p):
        items = []
        if not p.exists(): return items
        try:
            with open(p, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: items.append(json.loads(line))
                    except: continue
        except Exception: pass
        return items

    def match(rec, fields):
        """True if any of the listed fields contains q."""
        for f in fields:
            v = (rec.get(f, "") or "")
            if isinstance(v, list):
                v = " ".join(str(x) for x in v)
            v = str(v).lower()
            if q in v:
                return True
        return False

    results = {}

    # Lawyer applications
    apps = load_jsonl(_LAWYER_APP_PATH)
    app_matches = [a for a in apps if match(a, ["full_name", "email", "license_number", "phone", "city", "bio"])]
    results["lawyer_applications"] = {
        "n": len(app_matches),
        "items": [
            {
                "id":             a.get("id", "")[:8],
                "full_name":      a.get("full_name", ""),
                "email":          a.get("email", ""),
                "license_number": a.get("license_number", ""),
                "status":         a.get("status", "submitted"),
                "city":           a.get("city", ""),
                "iso":            a.get("iso", ""),
                "link":           "/admin-leads/lawyer-applications.html",
            }
            for a in app_matches[:limit]
        ],
    }

    # Lawyer requests
    reqs = load_jsonl(_LAWYER_REQUEST_PATH)
    req_matches = [r for r in reqs if match(r, ["name", "contact", "question", "user_email", "assigned_lawyer_name"])]
    results["lawyer_requests"] = {
        "n": len(req_matches),
        "items": [
            {
                "id":       r.get("id", "")[:8],
                "name":     r.get("name", ""),
                "contact":  r.get("contact", ""),
                "question": (r.get("question", "") or "")[:80],
                "status":   r.get("status", "new"),
                "domain":   r.get("domain", ""),
                "iso":      r.get("iso", ""),
                "link":     "/admin-leads/lawyer-requests.html",
            }
            for r in req_matches[:limit]
        ],
    }

    # Newsletter subscribers
    subs = load_jsonl(_NEWSLETTER_PATH)
    sub_matches = [s for s in subs if match(s, ["email", "source"])]
    results["newsletter_subscribers"] = {
        "n": len(sub_matches),
        "items": [
            {
                "email":           s.get("email", ""),
                "source":          s.get("source", ""),
                "iso":             s.get("iso", ""),
                "unsubscribed":    bool(s.get("unsubscribed_ts")),
                "link":            "/admin-leads/newsletter.html",
            }
            for s in sub_matches[:limit]
        ],
    }

    # Users (via auth module)
    user_matches = []
    try:
        from ..auth import list_users
        users = list_users()
        user_matches = [u for u in users if match(u, ["email", "id"])]
    except Exception: pass
    results["users"] = {
        "n": len(user_matches),
        "items": [
            {
                "id":      u.get("id", "")[:12],
                "email":   u.get("email", ""),
                "created_iso": u.get("created_iso", ""),
                "link":    "",
            }
            for u in user_matches[:limit]
        ],
    }

    # Feedback
    fbs = load_jsonl(_FEEDBACK_PATH)
    fb_matches = [f for f in fbs if match(f, ["message", "email", "user_email", "page", "kind"])]
    results["feedback"] = {
        "n": len(fb_matches),
        "items": [
            {
                "id":      f.get("id", "")[:8],
                "kind":    f.get("kind", "general"),
                "message": (f.get("message", "") or "")[:100],
                "page":    f.get("page", ""),
                "iso":     f.get("iso", ""),
                "link":    "/admin-leads/audit-log.html",
            }
            for f in fb_matches[:limit]
        ],
    }

    total = sum(v["n"] for v in results.values())
    return {"ok": True, "q": q, "total_matches": total, "by_source": results}


@app.post("/v1/admin/cleanup/scan")
def admin_cleanup_scan(request: Request, token: str = ""):  # type: ignore
    """v2.99.187 — Dry-run: scan all relevant JSONLs and return matches.
    POST body: optional {patterns: [...]} to override defaults."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    # Get patterns from query or body (sync read)
    try:
        import asyncio
        body = asyncio.run(request.json()) if False else None  # noqa
    except Exception: body = None
    patterns = _DEFAULT_TEST_PATTERNS[:]
    custom = request.query_params.get("patterns", "")
    if custom:
        patterns = [p.strip() for p in custom.split(",") if p.strip()]

    summary = {}
    total = 0
    for path_str, fields, _ in _CLEANUP_TARGETS:
        p = _wa_pl.Path(path_str)
        if not p.exists():
            summary[path_str] = {"total": 0, "matches": 0, "sample": []}
            continue
        items = []
        try:
            with open(p, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: items.append(json.loads(line))
                    except: continue
        except Exception: continue
        matches = [it for it in items if _record_matches_patterns(it, patterns, fields)]
        summary[path_str] = {
            "total": len(items),
            "matches": len(matches),
            "sample": [
                {
                    "email": next((it.get(f, "") for f in fields if it.get(f)), ""),
                    "iso":   it.get("iso", ""),
                    "id":    it.get("id", "")[:8],
                }
                for it in matches[:5]
            ],
        }
        total += len(matches)
    return {
        "ok":             True,
        "patterns":       patterns,
        "total_matches":  total,
        "by_file":        summary,
    }


@app.post("/v1/admin/cleanup/delete")
def admin_cleanup_delete(request: Request, token: str = "", confirm: str = ""):  # type: ignore
    """v2.99.187 — DESTRUCTIVE: remove matching records from all JSONL files.
    Requires confirm=YES_DELETE_TEST_DATA. Atomic per-file rewrite.
    Audit-logged. Returns counts deleted per file."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if confirm != "YES_DELETE_TEST_DATA":
        return JSONResponse(status_code=400, content={
            "ok": False, "reason": "confirm_required",
            "hint": "Pass ?confirm=YES_DELETE_TEST_DATA to acknowledge.",
        })
    patterns = _DEFAULT_TEST_PATTERNS[:]
    custom = request.query_params.get("patterns", "")
    if custom:
        patterns = [p.strip() for p in custom.split(",") if p.strip()]

    result = {}
    grand_deleted = 0
    for path_str, fields, _ in _CLEANUP_TARGETS:
        p = _wa_pl.Path(path_str)
        if not p.exists():
            result[path_str] = {"deleted": 0, "kept": 0}
            continue
        items = []
        try:
            with open(p, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: items.append(json.loads(line))
                    except: continue
        except Exception: continue
        kept = [it for it in items if not _record_matches_patterns(it, patterns, fields)]
        deleted_n = len(items) - len(kept)
        if deleted_n > 0:
            # Atomic rewrite
            tmp = p.with_suffix(p.suffix + ".cleanup.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for it in kept:
                    f.write(json.dumps(it, ensure_ascii=False) + "\n")
            tmp.replace(p)
        result[path_str] = {"deleted": deleted_n, "kept": len(kept)}
        grand_deleted += deleted_n

    _audit_log(actor="admin", role="admin", action="test_data_cleanup",
               target="(bulk)", meta={"patterns": patterns, "n_deleted": grand_deleted})

    return {"ok": True, "patterns": patterns, "total_deleted": grand_deleted, "by_file": result}


@app.post("/v1/feedback/submit")
def feedback_submit(body: _FeedbackBody, request: Request):  # type: ignore
    """Anyone (signed-in or anonymous) can submit feedback.
    v2.99.180 — honeypot + IP rate limit (5/min, burst 8)."""
    import time, uuid
    if _honeypot_caught(body.hp, body.hp2):
        return {"ok": True, "id": "drop"}
    if not _public_rate_check(request, "feedback", per_min=5, burst=8):
        return JSONResponse(status_code=429, content={"ok": False, "reason": "rate_limited"})
    try:
        msg = (body.message or "").strip()
        if len(msg) < 4:
            return JSONResponse(status_code=400, content={"ok": False, "reason": "message_too_short"})
        # Best-effort identity from auth header
        signed_in = None
        try:
            from ..auth import session_from_header
            sess = session_from_header(request.headers.get("authorization"))
            if sess:
                signed_in = {"id": sess["user_id"], "email": sess["email"]}
        except: pass
        record = {
            "id":      str(uuid.uuid4())[:12],
            "ts":      time.time(),
            "iso":     time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
            "message": msg[:4000],
            "kind":    (body.kind or "general") if body.kind in (None, "bug", "suggestion", "general", "praise") else "general",
            "page":    (body.page or "")[:200],
            "rating":  body.rating if (body.rating and 1 <= body.rating <= 5) else None,
            "user_id": signed_in["id"] if signed_in else None,
            "user_email": signed_in["email"] if signed_in else None,
            "user_agent": (request.headers.get("user-agent") or "")[:200],
            "status":  "new",
        }
        _FEEDBACK_PATH.parent.mkdir(parents=True, exist_ok=True)
        with _FEEDBACK_LOCK:
            with open(_FEEDBACK_PATH, "a", encoding="utf-8") as f:
                f.write(json.dumps(record, ensure_ascii=False) + "\n")
        _audit_log(
            actor=signed_in["email"] if signed_in else "anonymous",
            role="user" if signed_in else "anonymous",
            action="feedback_submitted",
            target=record["id"],
            meta={"kind": record["kind"], "rating": record["rating"]},
        )
        return {"ok": True, "id": record["id"], "message": "תודה על המשוב!"}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


@app.get("/v1/admin/feedback")
def admin_feedback_list(token: str = "", limit: int = 200, kind: str = ""):  # type: ignore
    """Admin: list submitted feedback."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if not _FEEDBACK_PATH.exists():
        return {"ok": True, "n": 0, "items": []}
    items = []
    with open(_FEEDBACK_PATH, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line: continue
            try: rec = json.loads(line)
            except: continue
            if kind and rec.get("kind") != kind: continue
            items.append(rec)
    items.sort(key=lambda r: -(r.get("ts") or 0))
    return {"ok": True, "n": len(items), "items": items[:max(1, min(limit, 2000))]}


def _audit_log(actor: str, role: str, action: str,
               target: str = "", meta: Optional[dict] = None) -> None:
    """Append an audit event. Best-effort, never raises.
    Args:
      actor:  email or 'system'
      role:   user | lawyer | admin | system | anonymous
      action: short snake_case event id (e.g. 'lawyer_assigned')
      target: request_id / lawyer_id / user_id / etc.
      meta:   free-form dict (kept compact — avoid PII bombs)
    v2.99.195 — if LE_AUDIT_WEBHOOK_URL is set, also POSTs the event
    to that URL in a background thread (best-effort, silent on fail).
    Useful for Slack/Discord/Zapier/n8n integrations."""
    import time, uuid
    try:
        _AUDIT_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
        rec = {
            "id":     str(uuid.uuid4())[:12],
            "ts":     time.time(),
            "iso":    time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
            "actor":  (actor or "system").strip().lower(),
            "role":   role or "system",
            "action": action,
            "target": target or "",
            "meta":   meta or {},
        }
        with _AUDIT_LOG_LOCK:
            with open(_AUDIT_LOG_PATH, "a", encoding="utf-8") as f:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        # v2.99.195 — fire webhook async (don't block main path)
        try:
            _fire_audit_webhook_async(rec)
        except Exception: pass
    except Exception as e:
        print(f"[audit] FAIL: {type(e).__name__}: {e}")


def _fire_audit_webhook_async(audit_rec: dict) -> None:
    """v2.99.195 — POST audit event to LE_AUDIT_WEBHOOK_URL if set.
    Adapts payload format based on detected webhook target.
    Best-effort, runs in background thread, silent on failure."""
    import os, threading
    url = os.environ.get("LE_AUDIT_WEBHOOK_URL", "").strip()
    if not url:
        return

    def _worker():
        import urllib.request, urllib.error
        # Detect target type — Slack/Discord want {text}, generic webhooks
        # accept the full record. Slack URLs are hooks.slack.com/services/...
        is_slack   = "hooks.slack.com" in url
        is_discord = "discord.com/api/webhooks" in url or "discordapp.com" in url

        # Build a human-readable summary line
        actor   = audit_rec.get("actor", "?")[:40]
        action  = audit_rec.get("action", "?")
        target  = audit_rec.get("target", "")[:20]
        emoji = {
            "newsletter_subscribe":      "📬",
            "newsletter_verified":       "✓",
            "newsletter_unsubscribe":    "🚫",
            "newsletter_sent":           "📤",
            "lawyer_application_submitted": "⚖",
            "lawyer_assigned":           "🎯",
            "lawyer_offered":            "⚡",
            "deliverable_submitted":     "📄",
            "deliverable_approved":      "✓",
            "deliverable_disputed":      "⚠",
            "dispute_resolved":          "⚖",
            "auto_release":              "⏱",
            "invite_consumed":           "🎟",
            "test_data_cleanup":         "🧹",
            "lawyer_note_added":         "📝",
            "backup_download":           "💾",
            "backup_restore":            "🔄",
        }.get(action, "·")
        summary = f"{emoji} {action} · {actor}" + (f" · {target}" if target else "")

        if is_slack or is_discord:
            payload = {"text": summary, "content": summary}   # both fields cover slack+discord
        else:
            payload = {
                "kind":   "legal-eye-audit",
                "summary": summary,
                "event":   audit_rec,
            }

        try:
            req = urllib.request.Request(
                url,
                data=json.dumps(payload).encode("utf-8"),
                headers={"Content-Type": "application/json", "User-Agent": "legal-eye/audit-webhook"},
                method="POST",
            )
            with urllib.request.urlopen(req, timeout=10) as r:
                if not (200 <= r.status < 300):
                    print(f"[audit-webhook] non-2xx: {r.status}")
        except Exception as e:
            print(f"[audit-webhook] FAIL: {type(e).__name__}: {str(e)[:120]}")

    threading.Thread(target=_worker, daemon=True).start()


@app.get("/v1/admin/dashboard/stream")
async def admin_dashboard_stream(token: str = ""):  # type: ignore
    """v2.99.193 — Server-Sent Events stream of dashboard data.
    Pushes a fresh snapshot every 5s for up to 5 min, then client
    auto-reconnects. EventSource doesn't support custom headers so
    token is passed in query string."""
    import os, asyncio, json as _j
    from fastapi.responses import StreamingResponse
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})

    async def event_gen():
        # Tell client to reconnect after 5s if we drop
        yield "retry: 5000\n\n"
        for tick in range(60):  # 60 * 5s = 5 min then close
            try:
                # Call the sync compute function via thread executor so we
                # don't block the event loop. The function call returns
                # either a dict or a JSONResponse (on auth fail, but we
                # already passed auth so it'll be a dict).
                data = await asyncio.to_thread(admin_dashboard, token)
                if not isinstance(data, dict):
                    # Auth probably expired mid-stream
                    yield f"event: auth-expired\ndata: {{}}\n\n"
                    return
                payload = _j.dumps(data, ensure_ascii=False)
                yield f"data: {payload}\n\n"
            except Exception as e:
                err = _j.dumps({"ok": False, "error": str(e)})
                yield f"event: error\ndata: {err}\n\n"
            await asyncio.sleep(5)
        # Graceful close — client EventSource will auto-reconnect
        yield "event: close\ndata: {}\n\n"

    return StreamingResponse(
        event_gen(),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache, no-transform",
            "Connection":    "keep-alive",
            "X-Accel-Buffering": "no",   # disable nginx buffering
        },
    )


@app.get("/v1/admin/dashboard")
def admin_dashboard(token: str = ""):  # type: ignore
    """v2.99.154 — Single-call aggregator for the founder overview dashboard.
    Returns: 7d KPIs vs prior 7d, funnel, activity feed (last 24h), alerts,
    system health. Admin-token-gated."""
    import os, time
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})

    def _read(p):
        out = []
        try:
            if not p.exists(): return out
            with open(p, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: out.append(json.loads(line))
                    except: continue
        except Exception: pass
        return out

    now = time.time()
    w1_start = now - 7 * 24 * 3600     # last 7 days
    w2_start = now - 14 * 24 * 3600    # prior 7 days

    requests   = _read(_LAWYER_REQUEST_PATH)
    apps       = _read(_LAWYER_APP_PATH)
    audit      = _read(_AUDIT_LOG_PATH)
    notifs     = _read(_NOTIFICATIONS_PATH)
    feedback   = _read(_FEEDBACK_PATH)
    triage     = _read(_TRIAGE_LOG_PATH)
    try:
        from ..auth import USERS_PATH
        users  = _read(USERS_PATH)
    except Exception:
        users = []

    def _count_in(items, ts_key, lo, hi):
        return sum(1 for it in items if lo <= (it.get(ts_key) or 0) < hi)

    def _delta(curr, prev):
        if prev == 0:
            return None if curr == 0 else 100.0
        return round(((curr - prev) / prev) * 100.0, 1)

    # ── KPIs: last 7d vs prior 7d
    kpis = {}
    for label, items, ts_key in [
        ("new_users",      users,    "created_ts"),
        ("new_requests",   requests, "ts"),
        ("new_lawyers",    apps,     "ts"),
        ("triage_runs",    triage,   "ts"),
        ("feedback_items", feedback, "ts"),
    ]:
        c = _count_in(items, ts_key, w1_start, now)
        p = _count_in(items, ts_key, w2_start, w1_start)
        kpis[label] = {"curr": c, "prev": p, "delta_pct": _delta(c, p)}
    # Deliverable / approve counts (from requests records)
    delivered_c = sum(1 for r in requests if (r.get("deliverable_ts") or 0) >= w1_start)
    delivered_p = sum(1 for r in requests if w2_start <= (r.get("deliverable_ts") or 0) < w1_start)
    approved_c  = sum(1 for r in requests if (r.get("approval_ts") or 0) >= w1_start)
    approved_p  = sum(1 for r in requests if w2_start <= (r.get("approval_ts") or 0) < w1_start)
    disputed_c  = sum(1 for r in requests if (r.get("dispute_ts") or 0) >= w1_start)
    disputed_p  = sum(1 for r in requests if w2_start <= (r.get("dispute_ts") or 0) < w1_start)
    kpis["deliverables"] = {"curr": delivered_c, "prev": delivered_p, "delta_pct": _delta(delivered_c, delivered_p)}
    kpis["approvals"]    = {"curr": approved_c,  "prev": approved_p,  "delta_pct": _delta(approved_c, approved_p)}
    kpis["disputes"]     = {"curr": disputed_c,  "prev": disputed_p,  "delta_pct": _delta(disputed_c, disputed_p)}

    # ── Funnel (cumulative, all-time)
    funnel = {
        "triage_runs":     len(triage),
        "requests":        len(requests),
        "matched":         sum(1 for r in requests if r.get("assigned_lawyer_id")),
        "accepted":        sum(1 for r in requests if r.get("lawyer_response") == "accepted"),
        "delivered":       sum(1 for r in requests if r.get("deliverable_ts")),
        "approved":        sum(1 for r in requests if r.get("approval_ts") and not r.get("auto_approved")),
        "auto_released":   sum(1 for r in requests if r.get("approval_ts") and r.get("auto_approved")),
        "disputed":        sum(1 for r in requests if r.get("status") == "disputed"),
        "dispute_closed":  sum(1 for r in requests if r.get("dispute_resolution_ts")),
    }

    # ── Activity feed (last 24h, mixed sources, sorted desc)
    activity_cutoff = now - 24 * 3600
    activity = []
    for r in requests:
        if (r.get("ts") or 0) >= activity_cutoff:
            activity.append({
                "ts":    r["ts"], "iso": r.get("iso", ""),
                "kind":  "request", "icon": "💬",
                "title": f"פנייה חדשה · {r.get('domain', '?')}",
                "summary": (r.get('question', '') or '')[:80],
                "link":  f"/admin-leads/lawyer-requests.html?id={r.get('id', '')[:8]}",
            })
    for a in apps:
        if (a.get("ts") or 0) >= activity_cutoff:
            activity.append({
                "ts": a["ts"], "iso": a.get("iso", ""),
                "kind": "lawyer_app", "icon": "⚖",
                "title": f"עו\"ד חדש · {a.get('full_name', '?')}",
                "summary": f"{', '.join(a.get('domains', []) or [])[:60]}",
                "link":  "/admin-leads/lawyer-applications.html",
            })
    for u in users:
        if (u.get("created_ts") or 0) >= activity_cutoff:
            activity.append({
                "ts": u["created_ts"],
                "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime(u.get("created_ts") or 0)),
                "kind": "user", "icon": "👤",
                "title": "משתמש חדש נרשם",
                "summary": (u.get("email", "") or "")[:50],
                "link":  "",
            })
    for ev in audit:
        if (ev.get("ts") or 0) >= activity_cutoff and ev.get("action") in (
            "deliverable_submitted", "deliverable_approved", "deliverable_disputed",
            "dispute_resolved", "auto_release", "invite_consumed",
        ):
            activity.append({
                "ts": ev["ts"], "iso": ev.get("iso", ""),
                "kind": "audit", "icon": "✓",
                "title": ev.get("action", "?"),
                "summary": f"{ev.get('actor','?')} · {ev.get('target','?')[:30]}",
                "link":  "/admin-leads/audit-log.html",
            })
    activity.sort(key=lambda x: -(x.get("ts") or 0))
    activity = activity[:40]

    # ── Alerts (current state, not windowed)
    alerts = []
    open_disp     = [r for r in requests if r.get("status") == "disputed"]
    pending_apps  = [a for a in apps if (a.get("status") or "submitted") in ("submitted", "under_review")]
    stale_new     = [r for r in requests if r.get("status") in ("new", "offered") and (now - (r.get("ts") or now)) > 24 * 3600]
    stuck_deliv   = [r for r in requests
                     if r.get("status") == "contacted"
                     and r.get("lawyer_response") == "accepted"
                     and (now - (r.get("assigned_ts") or now)) > 48 * 3600]
    auto_release_imminent = [r for r in requests
                             if r.get("status") == "submitted_for_approval"
                             and r.get("deliverable_ts")
                             and (now - r["deliverable_ts"]) > 60 * 3600]
    if open_disp:
        alerts.append({"kind": "dispute", "level": "danger",
                       "title": f"{len(open_disp)} מחלוקות פתוחות",
                       "link": "/admin-leads/lawyer-requests.html?status=disputed",
                       "count": len(open_disp)})
    if pending_apps:
        alerts.append({"kind": "verify", "level": "warn",
                       "title": f"{len(pending_apps)} בקשות עו\"ד ממתינות לאימות",
                       "link": "/admin-leads/lawyer-applications.html",
                       "count": len(pending_apps)})
    if stale_new:
        alerts.append({"kind": "stale", "level": "warn",
                       "title": f"{len(stale_new)} פניות 'new/offered' >24 שעות",
                       "link": "/admin-leads/lawyer-requests.html",
                       "count": len(stale_new)})
    if stuck_deliv:
        alerts.append({"kind": "stuck", "level": "warn",
                       "title": f"{len(stuck_deliv)} פניות שעו\"ד קיבל ולא הגיש >48 שעות",
                       "link": "/admin-leads/lawyer-requests.html",
                       "count": len(stuck_deliv)})
    if auto_release_imminent:
        alerts.append({"kind": "auto_release", "level": "info",
                       "title": f"{len(auto_release_imminent)} פניות לקראת auto-release (>60h)",
                       "link": "/admin-leads/lawyer-requests.html",
                       "count": len(auto_release_imminent)})

    # ── System health
    backup_state_path = _wa_pl.Path("tau_rag/runtime/admin_backup_state.json")
    backup_state = {}
    try:
        if backup_state_path.exists():
            backup_state = json.loads(backup_state_path.read_text(encoding="utf-8"))
    except Exception: pass
    digest_state = {}
    try:
        if _DIGEST_STATE_PATH.exists():
            digest_state = json.loads(_DIGEST_STATE_PATH.read_text(encoding="utf-8"))
    except Exception: pass

    health = {
        "smtp_configured":        bool(os.environ.get("LE_LEAD_EMAIL_SMTP_USER", "").strip()
                                       and os.environ.get("LE_LEAD_EMAIL_SMTP_PASS", "").strip()),
        "admin_token_configured": bool(os.environ.get("LE_ADMIN_TOKEN", "").strip()),
        "lead_email_configured":  bool(os.environ.get("LE_LEAD_EMAIL_TO", "").strip()),
        "webhook_configured":     bool(os.environ.get("LE_LEAD_WEBHOOK_URL", "").strip()),
        "backup_webhook_configured": bool(os.environ.get("LE_BACKUP_WEBHOOK_URL", "").strip()),
        "audit_webhook_configured":  bool(os.environ.get("LE_AUDIT_WEBHOOK_URL", "").strip()),  # v2.99.195
        "beta_gated":             bool(os.environ.get("LE_BETA_GATED", "").strip() == "1"),
        "backup_last_iso":        backup_state.get("last_backup_iso", ""),
        "backup_n_files":         backup_state.get("n_files", 0),
        "digest_last_iso":        digest_state.get("last_sent_iso", ""),
    }

    return {
        "ok":         True,
        "snapshot_ts": now,
        "kpis":       kpis,
        "funnel":     funnel,
        "activity":   activity,
        "alerts":     alerts,
        "health":     health,
        "totals": {
            "all_users":     len(users),
            "all_requests":  len(requests),
            "all_lawyers":   len(apps),
            "all_audit":     len(audit),
            "all_feedback":  len(feedback),
        },
    }


@app.get("/v1/admin/audit-log")
def admin_audit_log(token: str = "", limit: int = 200, role: str = "", action: str = "", actor: str = ""):  # type: ignore
    """Admin: query the audit log. Filters: role, action prefix, actor email."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if not _AUDIT_LOG_PATH.exists():
        return {"ok": True, "n": 0, "n_total": 0, "items": []}
    items = []
    try:
        with open(_AUDIT_LOG_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if role and rec.get("role") != role: continue
                if action and not (rec.get("action", "")).startswith(action): continue
                if actor and rec.get("actor") != actor.strip().lower(): continue
                items.append(rec)
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    items.sort(key=lambda r: -(r.get("ts") or 0))
    return {
        "ok":       True,
        "n":        min(len(items), max(1, min(limit, 2000))),
        "n_total":  len(items),
        "items":    items[:max(1, min(limit, 2000))],
    }


# ──────────────────────────────────────────────────────────────────────
# v2.99.153 — Backup + restore for HF Spaces ephemeral storage.
#
# HF Spaces free tier wipes /code on restarts (rebuild or factory). All
# JSONL state files (users, lawyer_applications, requests, audit_log,
# notifications, invites, feedback, triage_log) get lost. This module:
#   - GET  /v1/admin/backup/download    → ZIP of all state files
#   - POST /v1/admin/backup/restore     → restore from uploaded ZIP
#   - Daemon thread: POST snapshot to LE_BACKUP_WEBHOOK_URL every 30 min
# All admin-token gated. Restore requires double-confirm header.
# ──────────────────────────────────────────────────────────────────────

# Files included in backup (small JSONL + JSON state — NOT large uploads/deliverables)
_BACKUP_FILE_PATHS: List[str] = [
    "tau_rag/runtime/lawyer_requests.jsonl",
    "tau_rag/runtime/lawyer_applications.jsonl",
    "tau_rag/runtime/notifications.jsonl",
    "tau_rag/runtime/audit_log.jsonl",
    "tau_rag/runtime/beta_invites.jsonl",
    "tau_rag/runtime/feedback.jsonl",
    "tau_rag/runtime/triage_log.jsonl",
    "tau_rag/runtime/users.jsonl",
    "tau_rag/runtime/auth_sessions.jsonl",
    "tau_rag/runtime/admin_digest_state.json",
]


def _collect_backup_snapshot() -> Dict[str, Any]:
    """Read all state files into a single dict. Each file → {path, content, size, mtime}.
    Skips files that don't exist. Returns metadata + files list."""
    import time
    files = []
    total_bytes = 0
    for path_str in _BACKUP_FILE_PATHS:
        p = _wa_pl.Path(path_str)
        if not p.exists():
            continue
        try:
            content = p.read_text(encoding="utf-8")
            stat = p.stat()
            files.append({
                "path":    path_str,
                "content": content,
                "size":    stat.st_size,
                "mtime":   stat.st_mtime,
            })
            total_bytes += stat.st_size
        except Exception as e:
            print(f"[backup] read FAIL {path_str}: {e}")
    return {
        "version":      "1",
        "snapshot_ts":  time.time(),
        "snapshot_iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
        "n_files":      len(files),
        "total_bytes":  total_bytes,
        "files":        files,
    }


def _build_backup_zip(snapshot: Dict[str, Any]) -> bytes:
    """Pack snapshot into a ZIP with each file at its repo-relative path + manifest.json."""
    import io, zipfile, json as _j
    buf = io.BytesIO()
    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
        manifest = {
            "version":      snapshot["version"],
            "snapshot_ts":  snapshot["snapshot_ts"],
            "snapshot_iso": snapshot["snapshot_iso"],
            "n_files":      snapshot["n_files"],
            "total_bytes":  snapshot["total_bytes"],
            "files":        [{"path": f["path"], "size": f["size"], "mtime": f["mtime"]}
                             for f in snapshot["files"]],
        }
        zf.writestr("manifest.json", _j.dumps(manifest, ensure_ascii=False, indent=2))
        for f in snapshot["files"]:
            zf.writestr(f["path"], f["content"])
    return buf.getvalue()


@app.get("/v1/admin/backup/download")
def admin_backup_download(token: str = ""):  # type: ignore
    """v2.99.153 — Download a ZIP of all state files. Admin-token-gated."""
    import os
    from fastapi.responses import Response
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    try:
        snap = _collect_backup_snapshot()
        blob = _build_backup_zip(snap)
        import time
        fname = f"legal-eye-backup-{time.strftime('%Y%m%d-%H%M%S')}.zip"
        _audit_log(actor="admin", role="admin", action="backup_download",
                   target="(snapshot)", meta={"n_files": snap["n_files"], "size": len(blob)})
        return Response(
            content=blob,
            media_type="application/zip",
            headers={"Content-Disposition": f'attachment; filename="{fname}"'},
        )
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})


@app.get("/v1/admin/backup/status")
def admin_backup_status(token: str = ""):  # type: ignore
    """v2.99.153 — Inspect what would be backed up (no actual zip)."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    snap = _collect_backup_snapshot()
    webhook_set = bool(os.environ.get("LE_BACKUP_WEBHOOK_URL", "").strip())
    return {
        "ok":             True,
        "n_files":        snap["n_files"],
        "total_bytes":    snap["total_bytes"],
        "snapshot_iso":   snap["snapshot_iso"],
        "webhook_set":    webhook_set,
        "files": [{"path": f["path"], "size": f["size"]} for f in snap["files"]],
    }


@app.post("/v1/admin/backup/restore")
async def admin_backup_restore(request: Request, token: str = "", confirm: str = ""):  # type: ignore
    """v2.99.153 — DANGEROUS: restore state from an uploaded backup ZIP.
    Requires admin token + confirm=YES_REPLACE. Overwrites all listed state files."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if confirm != "YES_REPLACE":
        return JSONResponse(status_code=400, content={
            "ok": False, "reason": "confirm_required",
            "hint": "Pass ?confirm=YES_REPLACE to acknowledge file overwrite.",
        })
    try:
        body = await request.body()
        if not body:
            return JSONResponse(status_code=400, content={"ok": False, "reason": "empty_body"})
        import io, zipfile, json as _j
        with zipfile.ZipFile(io.BytesIO(body)) as zf:
            names = zf.namelist()
            if "manifest.json" not in names:
                return JSONResponse(status_code=400, content={"ok": False, "reason": "no_manifest"})
            manifest = _j.loads(zf.read("manifest.json").decode("utf-8"))
            allowed = set(_BACKUP_FILE_PATHS)
            restored = []
            skipped = []
            for name in names:
                if name == "manifest.json": continue
                if name not in allowed:
                    skipped.append(name); continue
                content = zf.read(name).decode("utf-8")
                p = _wa_pl.Path(name)
                p.parent.mkdir(parents=True, exist_ok=True)
                # Atomic write
                tmp = p.with_suffix(p.suffix + ".restore.tmp")
                tmp.write_text(content, encoding="utf-8")
                tmp.replace(p)
                restored.append({"path": name, "size": len(content)})
        _audit_log(actor="admin", role="admin", action="backup_restore",
                   target="(state)", meta={"n_restored": len(restored),
                                            "manifest_iso": manifest.get("snapshot_iso", "?")})
        return {
            "ok":           True,
            "n_restored":   len(restored),
            "n_skipped":    len(skipped),
            "manifest_iso": manifest.get("snapshot_iso", "?"),
            "restored":     restored,
            "skipped":      skipped,
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})


def _push_backup_to_webhook(snapshot: Dict[str, Any], zip_bytes: bytes) -> bool:
    """POST to LE_BACKUP_WEBHOOK_URL. Returns True on 2xx response."""
    import os, urllib.request, base64
    url = os.environ.get("LE_BACKUP_WEBHOOK_URL", "").strip()
    if not url:
        return False
    try:
        payload = {
            "kind":         "legal-eye-backup",
            "version":      snapshot["version"],
            "snapshot_iso": snapshot["snapshot_iso"],
            "n_files":      snapshot["n_files"],
            "total_bytes":  snapshot["total_bytes"],
            "zip_base64":   base64.b64encode(zip_bytes).decode("ascii"),
        }
        req = urllib.request.Request(
            url, data=json.dumps(payload).encode("utf-8"),
            headers={"Content-Type": "application/json"}, method="POST",
        )
        with urllib.request.urlopen(req, timeout=30) as r:
            return 200 <= r.status < 300
    except Exception as e:
        print(f"[backup] webhook FAIL: {type(e).__name__}: {e}")
        return False


_BACKUP_STATE_PATH = _wa_pl.Path("tau_rag/runtime/admin_backup_state.json")


def _start_auto_backup_thread() -> None:
    """v2.99.153 — Daemon. Every 30 min, if LE_BACKUP_WEBHOOK_URL is set
    and any state file has changed since last backup, POST a snapshot."""
    import threading, time, hashlib
    def _loop():
        time.sleep(120)  # warmup — let everything boot
        last_hash = ""
        while True:
            try:
                import os
                if not os.environ.get("LE_BACKUP_WEBHOOK_URL", "").strip():
                    time.sleep(30 * 60); continue
                snap = _collect_backup_snapshot()
                # Cheap change detection
                h = hashlib.sha256()
                for f in snap["files"]:
                    h.update(f["path"].encode()); h.update(f["content"].encode())
                cur_hash = h.hexdigest()
                if cur_hash == last_hash:
                    time.sleep(30 * 60); continue
                blob = _build_backup_zip(snap)
                ok = _push_backup_to_webhook(snap, blob)
                if ok:
                    last_hash = cur_hash
                    try:
                        _BACKUP_STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
                        _BACKUP_STATE_PATH.write_text(json.dumps({
                            "last_backup_ts":  snap["snapshot_ts"],
                            "last_backup_iso": snap["snapshot_iso"],
                            "n_files":         snap["n_files"],
                            "total_bytes":     snap["total_bytes"],
                            "sha256":          cur_hash[:16],
                        }, ensure_ascii=False, indent=2), encoding="utf-8")
                    except Exception: pass
                    print(f"[backup] auto-snapshot OK · {snap['n_files']} files · {snap['total_bytes']}B")
                time.sleep(30 * 60)
            except Exception as e:
                print(f"[backup] loop FAIL: {type(e).__name__}: {e}")
                time.sleep(30 * 60)
    threading.Thread(target=_loop, daemon=True).start()


# Start the daemon on import (alongside other startup threads)
try:
    _start_auto_backup_thread()
except Exception as _e:
    print(f"[backup] startup FAIL: {_e}")


def _create_notification(email: str, kind: str, title: str, body: str = "",
                         link: str = "", related_request_id: str = "") -> None:
    """Append a notification for `email`. Best-effort, never raises."""
    import time, uuid
    if not email: return
    try:
        _NOTIFICATIONS_PATH.parent.mkdir(parents=True, exist_ok=True)
        rec = {
            "id":                 str(uuid.uuid4())[:12],
            "ts":                 time.time(),
            "iso":                time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
            "email":              email.strip().lower(),
            "kind":               kind,                    # e.g. "status_change", "lawyer_offered", "deliverable_submitted"
            "title":              title,
            "body":               (body or "")[:600],
            "link":               link,
            "related_request_id": related_request_id,
            "read":               False,
        }
        with _NOTIFICATIONS_LOCK:
            with open(_NOTIFICATIONS_PATH, "a", encoding="utf-8") as f:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
    except Exception as e:
        print(f"[notif] FAIL: {type(e).__name__}: {e}")


@app.get("/v1/me/notifications")
def me_notifications(request: Request, limit: int = 30, unread_only: bool = False):  # type: ignore
    """List notifications for the signed-in user (works for both user
    and lawyer — discriminated only by their email)."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        if not _NOTIFICATIONS_PATH.exists():
            return {"ok": True, "n": 0, "n_unread": 0, "items": []}
        items = []
        email = sess["email"].strip().lower()
        with open(_NOTIFICATIONS_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if rec.get("email") == email:
                    if unread_only and rec.get("read"): continue
                    items.append(rec)
        # newest first
        items.sort(key=lambda r: -(r.get("ts") or 0))
        n_unread = sum(1 for r in items if not r.get("read"))
        return {
            "ok":        True,
            "n":         len(items),
            "n_unread":  n_unread,
            "items":     items[:max(1, min(limit, 200))],
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


class _NotifMarkBody(BaseModel):  # type: ignore
    ids:       Optional[List[str]] = None    # if None → mark all as read
    read:      bool = True


@app.post("/v1/me/notifications/mark")
def me_notifications_mark(body: _NotifMarkBody, request: Request):  # type: ignore
    """Mark one or more notifications read/unread.
    If body.ids is None or empty → mark ALL for this user."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        if not _NOTIFICATIONS_PATH.exists():
            return {"ok": True, "n_changed": 0}
        email = sess["email"].strip().lower()
        target_ids = set(body.ids or [])
        n_changed = 0
        with _NOTIFICATIONS_LOCK:
            items = []
            with open(_NOTIFICATIONS_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
            for rec in items:
                if rec.get("email") != email: continue
                if target_ids and rec.get("id") not in target_ids: continue
                if rec.get("read") != body.read:
                    rec["read"] = body.read
                    n_changed += 1
            tmp = _NOTIFICATIONS_PATH.with_suffix(".jsonl.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for rec in items:
                    f.write(json.dumps(rec, ensure_ascii=False) + "\n")
            tmp.replace(_NOTIFICATIONS_PATH)
        return {"ok": True, "n_changed": n_changed}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


@app.post("/v1/lawyer-request/submit-with-file")
async def lawyer_request_submit_with_file(  # type: ignore
    request:         Request,
    file:            UploadFile = File(...),
    question:        str        = Form(...),
    contact:         str        = Form(...),
    name:            str        = Form(""),
    contact_method:  str        = Form("either"),
    domain:          str        = Form(""),
    triage_category: str        = Form(""),
    risk_level:      str        = Form(""),
    urgent:          str        = Form("false"),
    notes:           str        = Form(""),
):
    """v2.99.124 — Like /submit but with a single file attachment.

    Accepts PDF / DOCX / DOC / PNG / JPG / JPEG / TXT / RTF up to 10 MB.
    Saves under runtime/document_uploads/{request_id}/{safe_filename}.
    Creates the same lawyer-request record with extra fields:
      has_attachment: true
      attachment_filename, attachment_size, attachment_mime
    Then fires the same notification flow.
    """
    import time, uuid
    try:
        q = (question or "").strip()
        c = (contact or "").strip()
        if len(q) < 4 or len(c) < 4:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "missing_required_fields",
            })
        # Validate file
        orig_name = (file.filename or "upload").strip()
        # Get extension (lowercased)
        ext = ""
        if "." in orig_name:
            ext = "." + orig_name.rsplit(".", 1)[-1].lower()
        if ext not in _DOC_ALLOWED_EXT:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "unsupported_filetype",
                "allowed": sorted(_DOC_ALLOWED_EXT),
            })
        # Read body & size-check
        content = await file.read()
        if len(content) > _DOC_MAX_BYTES:
            return JSONResponse(status_code=413, content={
                "ok": False, "reason": "file_too_large",
                "max_bytes": _DOC_MAX_BYTES,
            })
        if len(content) == 0:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "empty_file",
            })
        # Build record + save file
        req_id = str(uuid.uuid4())
        safe_name = _re.sub(r"[^\w\-.()]+", "_", orig_name)[:120] or ("upload" + ext)
        upload_dir = _DOC_UPLOAD_DIR / req_id
        upload_dir.mkdir(parents=True, exist_ok=True)
        file_path = upload_dir / safe_name
        with open(file_path, "wb") as f:
            f.write(content)

        # v2.99.130 — Optional user attribution
        signed_in_user = None
        try:
            from ..auth import session_from_header
            sess = session_from_header(request.headers.get("authorization"))
            if sess:
                signed_in_user = {"id": sess["user_id"], "email": sess["email"]}
        except Exception: pass

        record = {
            "id":                  req_id,
            "ts":                  time.time(),
            "iso":                 time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
            "question":            q,
            "name":                (name or "").strip(),
            "contact":             c,
            "contact_method":      contact_method or "either",
            "domain":              domain or "",
            "triage_category":     triage_category or "",
            "risk_level":          risk_level or "",
            "urgent":              str(urgent).lower() in ("true", "1", "yes"),
            "notes":               (notes or "").strip(),
            "has_attachment":      True,
            "attachment_filename": safe_name,
            "attachment_size":     len(content),
            "attachment_mime":     file.content_type or "application/octet-stream",
            "status":              "new",
            "user_id":             signed_in_user["id"]    if signed_in_user else None,
            "user_email":          signed_in_user["email"] if signed_in_user else None,
        }
        _LAWYER_REQUEST_PATH.parent.mkdir(parents=True, exist_ok=True)
        with _LAWYER_REQUEST_LOCK:
            with open(_LAWYER_REQUEST_PATH, "a", encoding="utf-8") as f:
                f.write(json.dumps(record, ensure_ascii=False) + "\n")
        _notify_lead_async(record)
        # v2.99.144 — audit
        _audit_log(
            actor=signed_in_user["email"] if signed_in_user else "anonymous",
            role="user" if signed_in_user else "anonymous",
            action="request_submitted",
            target=record["id"],
            meta={"domain": record["domain"], "urgent": record["urgent"],
                  "has_attachment": True, "filename": safe_name, "size": len(content)},
        )
        print(f"[lawyer-request] new #{req_id[:8]} (📎 {safe_name}, {len(content)}B) "
              f"domain={record['domain']} urgent={record['urgent']}")
        return {
            "ok":            True,
            "id":            req_id,
            "filename":      safe_name,
            "size":          len(content),
            "expected_sla":  "24 hours" if not record["urgent"] else "4 hours",
            "message":       "המסמך התקבל. עורך דין מאומת יבדוק ויחזור אליך תוך 24 שעות (4 שעות לדחוף).",
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.get("/v1/lawyer-request/{request_id}/attachment")
def lawyer_request_get_attachment(request_id: str, token: str = ""):  # type: ignore
    """Admin: download a request's attached file."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    # Find record
    if not _LAWYER_REQUEST_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})
    record = None
    try:
        with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try:
                    r = json.loads(line)
                    if r.get("id") == request_id:
                        record = r; break
                except Exception: continue
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    if not record or not record.get("has_attachment"):
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_attachment"})
    fname = record.get("attachment_filename", "upload")
    path = _DOC_UPLOAD_DIR / request_id / fname
    if not path.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "file_missing_on_disk"})
    from fastapi.responses import FileResponse as _FileResponse
    return _FileResponse(
        path,
        media_type=record.get("attachment_mime") or "application/octet-stream",
        filename=fname,
    )


# v2.99.148 — Dispute resolution
RESOLUTION_LABEL_HE = {
    "user":     "לטובת המשתמש",
    "lawyer":   "לטובת עורך הדין",
    "partial":  "פתרון משולב",
    "re_route": "ניתוב לעורך דין אחר",
}


def _notify_dispute_resolution_async(record: dict) -> None:
    """v2.99.148 — Email + bell both parties when admin resolves a dispute."""
    import threading, os
    resolution = record.get("dispute_resolution", "")
    label_he   = RESOLUTION_LABEL_HE.get(resolution, resolution)
    admin_note = record.get("dispute_resolution_note", "")
    rid_short  = (record.get("id") or "?")[:8]

    # In-app bell for both
    if record.get("user_email"):
        _create_notification(
            record["user_email"], kind="dispute_resolved",
            title=f"⚖ המחלוקת נפתרה: {label_he}",
            body=(admin_note or "")[:200],
            link="/my-matters/",
            related_request_id=record.get("id", ""),
        )
    if record.get("assigned_lawyer_email"):
        _create_notification(
            record["assigned_lawyer_email"], kind="dispute_resolved",
            title=f"⚖ המחלוקת נפתרה: {label_he}",
            body=(admin_note or "")[:200],
            link="/lawyer-dashboard/",
            related_request_id=record.get("id", ""),
        )

    def _worker():
        subject = f"legal-eye · המחלוקת נפתרה ({label_he})"
        title   = f"⚖ המחלוקת נפתרה: {label_he}"
        # User email
        if record.get("user_email"):
            if resolution == "user":
                user_body = (
                    f"שלום,\n\nהמחלוקת שפתחת על הפנייה ב-legal-eye נפתרה - לטובתך.\n\n"
                    f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\n"
                    f"(במודל תשלומים עתידי — סכום מוחזר יוחזר אליך)\n\nמזהה: {rid_short}"
                )
            elif resolution == "lawyer":
                user_body = (
                    f"שלום,\n\nהמחלוקת שפתחת על הפנייה ב-legal-eye נסקרה.\nלאחר בדיקה — הצוות החליט לטובת עורך הדין.\n\n"
                    f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nאם יש לך שאלות — ניתן לפתוח פנייה חדשה.\n\nמזהה: {rid_short}"
                )
            elif resolution == "partial":
                user_body = (
                    f"שלום,\n\nהמחלוקת שפתחת על הפנייה ב-legal-eye נפתרה בפתרון משולב.\n\n"
                    f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nמזהה: {rid_short}"
                )
            else:
                user_body = (
                    f"שלום,\n\nלאחר בדיקת המחלוקת — הפנייה שלך תשויך לעו\"ד אחר.\nלא נדרשת פעולה ממך. נחזור אליך בקרוב.\n\n"
                    f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nמזהה: {rid_short}"
                )
            _send_email_branded(
                record["user_email"], subject, user_body,
                title=title, cta_label="לאזור האישי ←",
                cta_url="https://legal-eye.1bigfam.com/my-matters/",
            )
        # Lawyer email
        if record.get("assigned_lawyer_email"):
            lname = record.get("assigned_lawyer_name", "")
            if resolution == "lawyer":
                lawyer_body = (
                    f"שלום עו\"ד {lname},\n\nהמחלוקת שנפתחה על התוצר שהגשת ב-legal-eye נסקרה — לטובתך.\n\n"
                    f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\n(במודל תשלומים עתידי — התשלום ישוחרר אליך)\n\nמזהה: {rid_short}"
                )
            elif resolution == "user":
                lawyer_body = (
                    f"שלום עו\"ד {lname},\n\nהמחלוקת שנפתחה על התוצר שהגשת נסקרה.\nלאחר בדיקה — הצוות החליט לטובת המשתמש.\n\n"
                    f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\n(במודל תשלומים עתידי — התשלום לא ישוחרר אליך)\n\nמזהה: {rid_short}"
                )
            elif resolution == "partial":
                lawyer_body = (
                    f"שלום עו\"ד {lname},\n\nהמחלוקת נפתרה בפתרון משולב.\n\n"
                    f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nמזהה: {rid_short}"
                )
            else:
                lawyer_body = (
                    f"שלום עו\"ד {lname},\n\nלאחר בדיקת המחלוקת — הפנייה הזו תועבר לעורך דין אחר.\nאין צורך לפעול ביחס לפנייה זו.\n\n"
                    f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nמזהה: {rid_short}"
                )
            _send_email_branded(
                record["assigned_lawyer_email"], subject, lawyer_body,
                title=title, cta_label="לדשבורד ←",
                cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/",
            )

    threading.Thread(target=_worker, daemon=True).start()


class _DisputeResolutionBody(BaseModel):  # type: ignore
    resolution: str    # user | lawyer | partial | re_route
    note:       str = ""


@app.post("/v1/lawyer-request/{request_id}/resolve-dispute")
def lawyer_request_resolve_dispute(request_id: str, body: _DisputeResolutionBody, token: str = ""):  # type: ignore
    """Admin: resolve an open dispute. Sets status + resolution + notifies both."""
    import os, time
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if body.resolution not in ("user", "lawyer", "partial", "re_route"):
        return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_resolution"})
    if not _LAWYER_REQUEST_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})
    target = None
    with _LAWYER_REQUEST_LOCK:
        items = []
        with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
                    try: items.append(json.loads(line))
                    except: continue
        for rec in items:
            if rec.get("id") == request_id:
                target = rec; break
        if not target:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "request_not_found"})
        if target.get("status") != "disputed":
            return JSONResponse(status_code=400, content={"ok": False, "reason": "not_disputed"})

        target["dispute_resolution"]      = body.resolution
        target["dispute_resolution_note"] = (body.note or "").strip()
        target["dispute_resolution_ts"]   = time.time()
        target["dispute_resolution_iso"]  = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
        target["dispute_resolved_by"]     = "admin"

        # Status transition
        if body.resolution == "re_route":
            # Send back to admin queue, unassign current lawyer
            target["status"]               = "new"
            target["assigned_lawyer_id"]    = None
            target["assigned_lawyer_name"]  = None
            target["assigned_lawyer_email"] = None
            target["lawyer_response"]       = None
        else:
            # All other resolutions close the request
            target["status"] = "done"

        target["updated_ts"]  = time.time()
        target["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())

        tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp")
        with open(tmp, "w", encoding="utf-8") as f:
            for rec in items:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        tmp.replace(_LAWYER_REQUEST_PATH)
    # Notify
    _notify_dispute_resolution_async(target)
    # Audit
    _audit_log(
        actor="admin", role="admin", action="dispute_resolved",
        target=request_id,
        meta={"resolution": body.resolution, "note": (body.note or "")[:200]},
    )
    return {"ok": True, "request_id": request_id, "resolution": body.resolution, "new_status": target["status"]}


def _send_admin_message_sync(to_addr: str, subject: str, body: str) -> bool:
    """v2.99.151 — Send custom admin message via branded helper. Returns True on send."""
    return _send_email_branded(
        to_addr, subject, body,
        title="הודעה מצוות legal-eye",
        cta_label="לאזור האישי ←",
        cta_url="https://legal-eye.1bigfam.com/my-matters/",
    )


class _AdminMessageBody(BaseModel):  # type: ignore
    recipient: str          # user | lawyer | both
    subject:   str
    body:      str


@app.post("/v1/lawyer-request/{request_id}/send-message")
def lawyer_request_send_message(request_id: str, body: _AdminMessageBody, token: str = ""):  # type: ignore
    """Admin sends a custom email message to user / lawyer / both.
    Message is also logged on the record under admin_messages[]."""
    import os, time
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if body.recipient not in ("user", "lawyer", "both"):
        return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_recipient"})
    subj = (body.subject or "").strip()
    msg_body = (body.body or "").strip()
    if not subj or len(msg_body) < 5:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "missing_subject_or_body"})
    if not _LAWYER_REQUEST_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})
    target = None
    with _LAWYER_REQUEST_LOCK:
        items = []
        with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
                    try: items.append(json.loads(line))
                    except: continue
        for rec in items:
            if rec.get("id") == request_id:
                target = rec; break
        if not target:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "request_not_found"})
        # Determine recipients
        recipients = []
        if body.recipient in ("user", "both") and target.get("user_email"):
            recipients.append(("user", target["user_email"]))
        if body.recipient in ("lawyer", "both") and target.get("assigned_lawyer_email"):
            recipients.append(("lawyer", target["assigned_lawyer_email"]))
        if not recipients:
            return JSONResponse(status_code=400, content={"ok": False, "reason": "no_valid_recipient_emails"})
        # Send + collect outcomes
        outcomes = []
        for role, addr in recipients:
            sent = _send_admin_message_sync(
                addr,
                f"[legal-eye] {subj}",
                f"{msg_body}\n\n---\nמזהה פנייה: {request_id[:8]}\n— legal-eye Admin",
            )
            outcomes.append({"role": role, "email": addr, "sent": sent})
        # Log onto record
        log_entry = {
            "ts":          time.time(),
            "iso":         time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
            "recipient":   body.recipient,
            "outcomes":    outcomes,
            "subject":     subj,
            "body":        msg_body,
        }
        target.setdefault("admin_messages", []).append(log_entry)
        # Rewrite
        tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp")
        with open(tmp, "w", encoding="utf-8") as f:
            for rec in items:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        tmp.replace(_LAWYER_REQUEST_PATH)
    # v2.99.144 — audit
    _audit_log(
        actor="admin", role="admin", action="admin_message_sent",
        target=request_id,
        meta={
            "recipient": body.recipient,
            "subject": subj[:120],
            "n_sent":   sum(1 for o in outcomes if o["sent"]),
            "n_failed": sum(1 for o in outcomes if not o["sent"]),
        },
    )
    return {
        "ok":       True,
        "outcomes": outcomes,
        "n_sent":   sum(1 for o in outcomes if o["sent"]),
        "n_failed": sum(1 for o in outcomes if not o["sent"]),
    }


class _LawyerRequestUpdateBody(BaseModel):  # type: ignore
    id:     str
    status: str           # new | contacted | done | spam
    note:   Optional[str] = ""


@app.post("/v1/lawyer-request/update")
def lawyer_request_update(req: _LawyerRequestUpdateBody, token: str = ""):  # type: ignore
    """v2.99.123 — Admin: update status of a waitlist record.
    Allowed statuses: new / contacted / done / spam.
    JSONL is rewritten in-place; lock prevents concurrent writes."""
    import os, time
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    allowed = {"new", "contacted", "done", "spam"}
    if req.status not in allowed:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_status"})
    if not _LAWYER_REQUEST_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})
    with _LAWYER_REQUEST_LOCK:
        items = []
        try:
            with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: items.append(json.loads(line))
                    except Exception: continue
        except Exception as e:
            return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
        # Find & update
        found = False
        old_status = ""
        updated_record = None
        for rec in items:
            if rec.get("id") == req.id:
                old_status = rec.get("status", "new")
                rec["status"]      = req.status
                rec["updated_ts"]  = time.time()
                rec["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
                if req.note: rec["admin_note"] = req.note
                found = True
                updated_record = rec
                break
        if not found:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"})
        # Rewrite atomically
        try:
            tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for rec in items:
                    f.write(json.dumps(rec, ensure_ascii=False) + "\n")
            tmp.replace(_LAWYER_REQUEST_PATH)
        except Exception as e:
            return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    # v2.99.133 — Notify the user (async, non-blocking) if their status
    # changed to "contacted" or "done" and they have an email on record
    if updated_record:
        _notify_user_status_change_async(updated_record, old_status, req.status)
    # v2.99.144 — audit
    _audit_log(
        actor="admin", role="admin", action="request_status_update",
        target=req.id,
        meta={"old": old_status, "new": req.status, "note": req.note or ""},
    )
    return {"ok": True, "id": req.id, "status": req.status, "notified": bool(updated_record and updated_record.get("user_email"))}


@app.get("/v1/lawyer-request/list")
def lawyer_request_list(token: str = ""):  # type: ignore
    """Admin: list all waitlist requests. Protected by a simple env-token."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if not _LAWYER_REQUEST_PATH.exists():
        return {"ok": True, "n": 0, "items": []}
    items = []
    try:
        with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: items.append(json.loads(line))
                except Exception: continue
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    return {"ok": True, "n": len(items), "items": items}


# ──────────────────────────────────────────────────────────────────────
# v2.99.125 — Lawyer onboarding v0. Lawyers register themselves so we
# can seed the pool. No Auth yet — admin manually verifies via license
# number + Bar Association lookup.
# ──────────────────────────────────────────────────────────────────────
_LAWYER_APP_PATH = _wa_pl.Path("tau_rag/runtime/lawyer_applications.jsonl")
_LAWYER_APP_LOCK = _wa_th.Lock()


def _notify_lawyer_application_async(record: dict) -> None:
    """Same webhook + email channels as lead notify, but framed for
    a lawyer joining the pool (different prefix in summaries)."""
    import threading, os
    def _worker():
        # Webhook
        try:
            url = os.environ.get("LE_LEAD_WEBHOOK_URL", "").strip()
            if url:
                import urllib.request
                summary = (
                    f"⚖ עו\"ד חדש מבקש להצטרף ל-legal-eye\n"
                    f"שם: {record.get('full_name', '?')}\n"
                    f"רישיון: {record.get('license_number', '?')}\n"
                    f"אימייל: {record.get('email', '?')}\n"
                    f"טלפון: {record.get('phone', '?')}\n"
                    f"תחומים: {', '.join(record.get('domains', []))}\n"
                    f"ותק: {record.get('years_experience', '?')} שנים\n"
                    f"עיר: {record.get('city', '?')}\n"
                    f"מזהה: {record.get('id', '?')[:8]}"
                )
                payload = {"text": summary, "summary": summary, "record": record, "_type": "lawyer_application"}
                req = urllib.request.Request(
                    url, data=json.dumps(payload).encode("utf-8"),
                    headers={"Content-Type": "application/json"}, method="POST",
                )
                try:
                    with urllib.request.urlopen(req, timeout=10) as r:
                        print(f"[lawyer-app-notify] webhook OK ({r.status})")
                except Exception as e:
                    print(f"[lawyer-app-notify] webhook FAIL: {type(e).__name__}: {e}")
        except Exception as e:
            print(f"[lawyer-app-notify] outer FAIL: {e}")
        # Email
        try:
            to_addr = os.environ.get("LE_LEAD_EMAIL_TO", "").strip()
            if to_addr:
                body = (
                    f"עו\"ד חדש מבקש להצטרף ל-legal-eye.\n\n"
                    f"מזהה: {record.get('id')}\n"
                    f"זמן: {record.get('iso')}\n\n"
                    f"שם מלא: {record.get('full_name', '?')}\n"
                    f"מס׳ רישיון: {record.get('license_number', '?')}\n"
                    f"אימייל: {record.get('email', '?')}\n"
                    f"טלפון: {record.get('phone', '?')}\n"
                    f"עיר: {record.get('city', '?')} (עבודה מרחוק: {'כן' if record.get('remote_ok') else 'לא'})\n\n"
                    f"תחומי התמחות: {', '.join(record.get('domains', []))}\n"
                    f"ותק: {record.get('years_experience', '?')} שנים\n"
                    f"שפות: {', '.join(record.get('languages', []))}\n\n"
                    f"ביוגרפיה:\n{record.get('bio', '(לא צוין)')}\n\n"
                    f"למה לרצות להצטרף:\n{record.get('motivation', '(לא צוין)')}"
                )
                subject = f"[legal-eye] עו״ד חדש · {record.get('full_name', '?')} · {record.get('id', '')[:8]}"
                ok = _send_email_branded(
                    to_addr, subject, body,
                    title="⚖ עו\"ד חדש מבקש להצטרף",
                    cta_label="לאימות עורכי הדין ←",
                    cta_url="https://legal-eye.1bigfam.com/admin-leads/lawyer-applications.html",
                )
                if ok:
                    print(f"[lawyer-app-notify] email OK → {to_addr}")
        except Exception as e:
            print(f"[lawyer-app-notify] email outer FAIL: {e}")
    threading.Thread(target=_worker, daemon=True).start()


class _LawyerApplicationBody(BaseModel):  # type: ignore
    full_name:        str
    license_number:   str
    email:            str
    phone:            str
    domains:          List[str]                    # multi-select
    years_experience: Optional[int] = 0
    languages:        Optional[List[str]] = None    # default: ['hebrew']
    city:             Optional[str] = ""
    remote_ok:        Optional[bool] = True
    bio:              Optional[str] = ""
    motivation:       Optional[str] = ""
    accept_terms:     bool                          # must be true
    invite_code:      Optional[str] = ""            # v2.99.150 — required if LE_BETA_GATED=1


@app.post("/v1/lawyer-onboarding/submit")
def lawyer_onboarding_submit(req: _LawyerApplicationBody):  # type: ignore
    """v2.99.125 — Lawyer joins the pool. No Auth — admin verifies
    manually against Bar Association registry.
    v2.99.150 — If LE_BETA_GATED=1, requires a valid invite code."""
    import os, time, uuid
    try:
        # Beta gating
        if os.environ.get("LE_BETA_GATED", "").strip() in ("1", "true", "yes"):
            inv = _validate_invite_code(req.invite_code or "")
            if not inv.get("ok"):
                return JSONResponse(status_code=400, content={
                    "ok": False, "reason": f"invite_{inv.get('reason','invalid')}",
                })
        if not req.accept_terms:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "terms_not_accepted",
            })
        # Required fields
        for field, val in [
            ("full_name", req.full_name),
            ("license_number", req.license_number),
            ("email", req.email),
            ("phone", req.phone),
        ]:
            if not (val or "").strip():
                return JSONResponse(status_code=400, content={
                    "ok": False, "reason": f"missing_{field}",
                })
        if not req.domains:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "missing_domains",
            })
        # Validate email shape
        em = req.email.strip()
        if "@" not in em or "." not in em.split("@")[-1]:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "invalid_email",
            })
        # Validate license: digits only, 4-7 chars (Israeli Bar uses 4-6)
        lic = _re.sub(r"\D+", "", req.license_number)
        if len(lic) < 3 or len(lic) > 8:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "invalid_license_number",
            })
        record = {
            "id":                str(uuid.uuid4()),
            "ts":                time.time(),
            "iso":                time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
            "full_name":         req.full_name.strip(),
            "license_number":    lic,
            "email":             em,
            "phone":             req.phone.strip(),
            "domains":           [d.strip() for d in req.domains if d.strip()],
            "years_experience":  int(req.years_experience or 0),
            "languages":         (req.languages or ["hebrew"]),
            "city":              (req.city or "").strip(),
            "remote_ok":         bool(req.remote_ok),
            "bio":               (req.bio or "").strip(),
            "motivation":        (req.motivation or "").strip(),
            "status":            "submitted",
        }
        _LAWYER_APP_PATH.parent.mkdir(parents=True, exist_ok=True)
        # Check duplicate (same license + email = duplicate)
        existing = []
        if _LAWYER_APP_PATH.exists():
            try:
                with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                    for line in f:
                        try: existing.append(json.loads(line))
                        except: continue
            except: pass
        for prev in existing:
            if prev.get("license_number") == lic and prev.get("email") == em:
                return JSONResponse(status_code=409, content={
                    "ok": False, "reason": "already_applied",
                    "id":  prev.get("id"),
                    "status": prev.get("status"),
                })
        # Append
        with _LAWYER_APP_LOCK:
            with open(_LAWYER_APP_PATH, "a", encoding="utf-8") as f:
                f.write(json.dumps(record, ensure_ascii=False) + "\n")
        # v2.99.150 — consume invite if beta-gated
        if os.environ.get("LE_BETA_GATED", "").strip() in ("1", "true", "yes"):
            if req.invite_code:
                _consume_invite_code(req.invite_code, em)
                record["invite_code"] = req.invite_code.strip().upper()
        _notify_lawyer_application_async(record)
        # v2.99.144 — audit
        _audit_log(
            actor=em, role="lawyer", action="lawyer_application_submitted",
            target=record["id"],
            meta={"license": lic, "domains": record["domains"], "years": record["years_experience"]},
        )
        print(f"[lawyer-app] new #{record['id'][:8]} {record['full_name']} "
              f"lic={lic} domains={record['domains']}")
        return {
            "ok":       True,
            "id":       record["id"],
            "message":  "בקשתך התקבלה. נוודא את פרטי הרישיון מול לשכת עורכי הדין ונחזור אליך תוך 5 ימי עסקים.",
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


# ──────────────────────────────────────────────────────────────────────
# v2.99.155 — Public lawyer profile.
# Returns the lawyer's bio / specialties / credentials WITHOUT contact
# details (email/phone always masked). For verified lawyers only.
# Slug format: <name-slug>-<id_prefix> for nice URLs e.g.
#   /lawyer-profile/?slug=dani-cohen-abc12345
# Accepts either ?id=<full_uuid> or ?slug=<name-slug>-<id_prefix>.
# ──────────────────────────────────────────────────────────────────────
def _slugify_he(name: str) -> str:
    """Make a URL-safe slug from a Hebrew name. Keeps Hebrew letters,
    replaces spaces with hyphens, drops everything else."""
    import re as _re
    n = (name or "").strip()
    # Replace any whitespace/punct with hyphen, keep Hebrew + Latin + digits
    n = _re.sub(r'[\s_]+', '-', n)
    n = _re.sub(r'[^֐-׿a-zA-Z0-9\-]', '', n)
    n = _re.sub(r'-+', '-', n).strip('-')
    return n or "lawyer"


def _lawyer_public_slug(rec: dict) -> str:
    """Return canonical slug for a lawyer record: name-slug + id-prefix."""
    return f"{_slugify_he(rec.get('full_name', ''))}-{(rec.get('id', '') or '')[:8]}"


def _lawyer_review_stats(lawyer_id: str) -> dict:
    """v2.99.189 — Compute review aggregation from lawyer_requests.jsonl.
    Returns {avg, count} for this lawyer's approved requests with ratings."""
    if not lawyer_id or not _LAWYER_REQUEST_PATH.exists():
        return {"avg": None, "count": 0}
    total = 0
    n = 0
    try:
        with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if rec.get("assigned_lawyer_id") != lawyer_id:
                    continue
                rating = rec.get("review_rating")
                if rating and isinstance(rating, (int, float)) and 1 <= rating <= 5:
                    total += rating
                    n += 1
    except Exception: pass
    if n == 0:
        return {"avg": None, "count": 0}
    return {"avg": round(total / n, 1), "count": n}


def _lawyer_public_view(rec: dict) -> dict:
    """Return only public-safe fields from a lawyer record."""
    review_stats = _lawyer_review_stats(rec.get("id", ""))    # v2.99.189
    # v2.99.204 — opt-in WhatsApp link. Normalize to E.164-ish (digits only,
    # Israeli prefixes 0X → 972X). Only exposed if lawyer toggled opt-in.
    wa_phone = ""
    if rec.get("whatsapp_public_optin"):
        raw = (rec.get("phone") or "").strip()
        digits = "".join(c for c in raw if c.isdigit())
        if digits.startswith("972"):
            wa_phone = digits
        elif digits.startswith("0") and len(digits) >= 9:
            wa_phone = "972" + digits[1:]
        elif len(digits) >= 9:
            wa_phone = digits
    return {
        "id":               rec.get("id", ""),
        "slug":             _lawyer_public_slug(rec),
        "full_name":        rec.get("full_name", ""),
        "license_number":   rec.get("license_number", ""),
        "domains":          rec.get("domains", []) or [],
        "years_experience": rec.get("years_experience", 0),
        "languages":        rec.get("languages", []) or [],
        "city":             rec.get("city", ""),
        "remote_ok":        bool(rec.get("remote_ok", False)),
        "bio":              rec.get("bio", ""),
        "verified":         (rec.get("status") == "verified"),
        "joined_iso":       rec.get("iso", ""),
        "photo_url":        rec.get("photo_url", ""),    # v2.99.178
        "review_avg":       review_stats["avg"],         # v2.99.189
        "review_count":     review_stats["count"],
        "whatsapp_phone":   wa_phone,                    # v2.99.204
    }


@app.get("/sitemap.xml")
def sitemap_xml():  # type: ignore
    """v2.99.157 — Dynamic sitemap. Static public URLs + one URL per
    verified lawyer profile. Served via Vercel rewrite from
    legal-eye.1bigfam.com/sitemap.xml."""
    from fastapi.responses import Response
    import time
    base = "https://legal-eye.1bigfam.com"
    today = time.strftime("%Y-%m-%d")

    # Static URLs — homepage + marketing + content
    static_urls = [
        (f"{base}/",                       today,         "weekly",  "1.0"),
        (f"{base}/landing",                today,         "weekly",  "0.9"),
        (f"{base}/eval",                   today,         "weekly",  "0.9"),
        (f"{base}/compare",                "2026-05-12",  "monthly", "0.8"),
        (f"{base}/founder",                "2026-05-12",  "monthly", "0.7"),
        (f"{base}/doctrines",              "2026-05-13",  "monthly", "0.8"),
        (f"{base}/blog",                   "2026-05-13",  "weekly",  "0.8"),
        (f"{base}/blog/legal-ai-pitfalls", "2026-05-13",  "monthly", "0.7"),
        (f"{base}/blog/how-to-read-legal-ai-eval", "2026-05-13", "monthly", "0.7"),
        (f"{base}/blog/rag-101",           "2026-05-13",  "monthly", "0.7"),
        (f"{base}/blog/850-docs-no-change","2026-05-13",  "monthly", "0.7"),
        (f"{base}/blog/rental-contract-mistakes","2026-05-25","monthly","0.8"),
        (f"{base}/blog/severance-rights-after-dismissal","2026-05-26","monthly","0.8"),
        (f"{base}/blog/consumer-rights-israel","2026-05-27","monthly","0.8"),
        (f"{base}/blog/statute-of-limitations-israel","2026-05-28","monthly","0.85"),
        (f"{base}/blog/what-to-do-when-sued","2026-05-29","monthly","0.85"),
        (f"{base}/docs/api",               "2026-05-13",  "monthly", "0.6"),
        (f"{base}/for-lawyers/",           today,         "weekly",  "0.9"),
        (f"{base}/lawyers/",               today,         "daily",   "0.9"),
        (f"{base}/faq/",                   today,         "weekly",  "0.8"),
        (f"{base}/about/",                 today,         "monthly", "0.7"),
        (f"{base}/contact/",               today,         "monthly", "0.7"),
        (f"{base}/pricing/",               today,         "weekly",  "0.8"),
        (f"{base}/press/",                 today,         "monthly", "0.6"),
        (f"{base}/status/",                today,         "daily",   "0.5"),
        (f"{base}/changelog/",             today,         "weekly",  "0.6"),
        (f"{base}/glossary/",              today,         "monthly", "0.8"),
        (f"{base}/calculator/",            today,         "monthly", "0.85"),
        (f"{base}/how-it-works/",          today,         "monthly", "0.9"),
        (f"{base}/reviews/",               today,         "weekly",  "0.75"),
        (f"{base}/templates/",             today,         "monthly", "0.9"),
        (f"{base}/quiz/",                  today,         "monthly", "0.85"),
        (f"{base}/precedents/",            today,         "monthly", "0.85"),
        (f"{base}/lawyer-cost-guide/",     today,         "monthly", "0.9"),
        (f"{base}/timeline/",              today,         "monthly", "0.8"),
        (f"{base}/start/",                 today,         "monthly", "0.95"),
        (f"{base}/israeli-laws/",          today,         "monthly", "0.85"),
        (f"{base}/our-corpus/",            today,         "monthly", "0.9"),
        (f"{base}/roadmap/",               today,         "monthly", "0.8"),
        (f"{base}/security/",              today,         "monthly", "0.75"),
        (f"{base}/en/",                    today,         "monthly", "0.7"),
        (f"{base}/methodology/",           today,         "monthly", "0.85"),
        (f"{base}/contribute/",            today,         "monthly", "0.75"),
        (f"{base}/now/",                   today,         "weekly",  "0.8"),
        (f"{base}/quick-reference/",       today,         "monthly", "0.85"),
        (f"{base}/api/",                   today,         "monthly", "0.85"),
        (f"{base}/api/playground/",        today,         "monthly", "0.8"),
        (f"{base}/verify-citation/",       today,         "monthly", "0.85"),
        (f"{base}/try/",                   today,         "weekly",  "0.9"),
        (f"{base}/case-brief/",            today,         "monthly", "0.8"),
        (f"{base}/deadline-calc/",         today,         "monthly", "0.9"),
        (f"{base}/tools/",                 today,         "weekly",  "0.95"),
        (f"{base}/redline/",               today,         "monthly", "0.85"),
        (f"{base}/sitemap/",               today,         "weekly",  "0.8"),
        (f"{base}/poster/",                today,         "monthly", "0.7"),
        (f"{base}/learn/",                 today,         "weekly",  "0.9"),
        (f"{base}/learn/day-1/",           today,         "monthly", "0.75"),
        (f"{base}/learn/day-2/",           today,         "monthly", "0.75"),
        (f"{base}/learn/day-3/",           today,         "monthly", "0.75"),
        (f"{base}/learn/day-4/",           today,         "monthly", "0.75"),
        (f"{base}/learn/day-5/",           today,         "monthly", "0.75"),
        (f"{base}/learn/day-6/",           today,         "monthly", "0.75"),
        (f"{base}/learn/day-7/",           today,         "monthly", "0.75"),
        (f"{base}/poster-citations/",      today,         "monthly", "0.7"),
        (f"{base}/posters/",               today,         "weekly",  "0.85"),
        (f"{base}/share/",                 today,         "monthly", "0.7"),
        (f"{base}/poster-doctrines/",      today,         "monthly", "0.7"),
        (f"{base}/qr/",                    today,         "monthly", "0.75"),
        (f"{base}/cases/",                 today,         "weekly",  "0.85"),
        (f"{base}/cases/raviv-bayit-yules/", today,       "monthly", "0.8"),
        (f"{base}/cases/roker-salomon/",   today,         "monthly", "0.8"),
        (f"{base}/cases/paz-gas-gazit-hadarom/", today,   "monthly", "0.8"),
        (f"{base}/cases/bank-mizrahi-migdal/", today,     "monthly", "0.85"),
        (f"{base}/cases/alice-miller-defense/", today,    "monthly", "0.85"),
        (f"{base}/cases/yedioth-kraus/",   today,         "monthly", "0.85"),
        (f"{base}/cases/aes-systems-saar/", today,        "monthly", "0.85"),
        (f"{base}/cases/vaknin-bet-shemesh/", today,      "monthly", "0.85"),
        (f"{base}/blog/non-compete-hi-tech-israel/", today, "monthly", "0.8"),
        (f"{base}/blog/negligence-claim-israel/", today, "monthly", "0.8"),
        (f"{base}/perf/",                   today,         "monthly", "0.7"),
        (f"{base}/limitations-check/",      today,         "monthly", "0.85"),
        (f"{base}/components/",             today,         "monthly", "0.7"),
        (f"{base}/og-test/",                today,         "monthly", "0.6"),
        (f"{base}/anonymize/",             today,         "monthly", "0.85"),
        (f"{base}/numbers/",               today,         "weekly",  "0.85"),
        (f"{base}/keyboard/",              today,         "monthly", "0.65"),
        (f"{base}/badge/",                 today,         "monthly", "0.7"),
        (f"{base}/lawyer-faq/",            today,         "monthly", "0.85"),
        (f"{base}/ai-glossary/",           today,         "monthly", "0.8"),
        (f"{base}/lawyers/onboard.html",   "2026-05-21",  "monthly", "0.6"),
        # v2.99.200 — Domain landing pages (SEO funnel per-area)
        (f"{base}/lawyers/employment/",    today,         "weekly",  "0.85"),
        (f"{base}/lawyers/contracts/",     today,         "weekly",  "0.85"),
        (f"{base}/lawyers/consumer/",      today,         "weekly",  "0.85"),
        (f"{base}/lawyers/tenancy/",       today,         "weekly",  "0.85"),
        (f"{base}/legal/tos.html",         "2026-05-12",  "yearly",  "0.3"),
        (f"{base}/legal/privacy.html",     "2026-05-12",  "yearly",  "0.3"),
    ]

    # Dynamic — one URL per verified lawyer
    lawyer_urls = []
    if _LAWYER_APP_PATH.exists():
        try:
            with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: rec = json.loads(line)
                    except: continue
                    if rec.get("status") != "verified":
                        continue
                    slug = _lawyer_public_slug(rec)
                    iso  = (rec.get("iso", "") or "")[:10] or today
                    from urllib.parse import quote
                    lawyer_urls.append(
                        (f"{base}/lawyer-profile/?slug={quote(slug)}",
                         iso, "monthly", "0.7")
                    )
        except Exception as e:
            print(f"[sitemap] lawyer enum FAIL: {e}")

    lines = ['<?xml version="1.0" encoding="UTF-8"?>',
             '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">']
    for loc, lastmod, freq, prio in static_urls + lawyer_urls:
        # XML-safe loc (& must be &amp;)
        loc_safe = loc.replace("&", "&amp;")
        lines.append(
            f"  <url><loc>{loc_safe}</loc>"
            f"<lastmod>{lastmod}</lastmod>"
            f"<changefreq>{freq}</changefreq>"
            f"<priority>{prio}</priority></url>"
        )
    lines.append('</urlset>')
    xml = "\n".join(lines)
    return Response(
        content=xml, media_type="application/xml; charset=utf-8",
        headers={"Cache-Control": "public, max-age=3600, s-maxage=3600"},
    )


@app.get("/robots.txt")
def robots_txt():  # type: ignore
    """v2.99.157 — Dynamic robots.txt. Allows public pages, blocks admin/private,
    points to the dynamic sitemap.xml."""
    from fastapi.responses import Response
    txt = """# legal-eye — Legal Intelligence Platform
# legal-eye.1bigfam.com · Hebrew legal RAG, verbatim-from-precedent

User-agent: *
Allow: /
Allow: /landing
Allow: /eval
Allow: /compare
Allow: /doctrines
Allow: /founder
Allow: /blog/
Allow: /docs/api
Allow: /legal/
Allow: /for-lawyers/
Allow: /lawyers/
Allow: /lawyer-profile/
Allow: /faq/
Allow: /about/
Allow: /contact/
Allow: /pricing/
Allow: /press/
Allow: /brand/
Allow: /status/
Allow: /changelog/

# Internal / admin / private paths
Disallow: /admin
Disallow: /admin-founding50/
Disallow: /admin-leads/
Disallow: /admin-analytics/
Disallow: /docs/setup/
Disallow: /my-matters/
Disallow: /lawyer-dashboard/
Disallow: /v1/
Disallow: /static/

# Be a good citizen
Crawl-delay: 1

Sitemap: https://legal-eye.1bigfam.com/sitemap.xml
"""
    return Response(
        content=txt, media_type="text/plain; charset=utf-8",
        headers={"Cache-Control": "public, max-age=3600"},
    )


@app.get("/v1/lawyer/profile")
def lawyer_profile(id: str = "", slug: str = ""):  # type: ignore
    """v2.99.155 — Public lawyer profile. Returns only public-safe fields.
    Lookup by either full UUID (`?id=`) or slug (`?slug=name-id_prefix`).
    Only returns verified lawyers."""
    if not (id or slug):
        return JSONResponse(status_code=400, content={"ok": False, "reason": "id_or_slug_required"})
    if not _LAWYER_APP_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found"})
    target = None
    try:
        with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                # Match by id or slug
                if id and rec.get("id") == id:
                    target = rec; break
                if slug and _lawyer_public_slug(rec) == slug:
                    target = rec; break
                # Also accept just id-prefix tail of slug as a fallback
                if slug and "-" in slug:
                    tail = slug.split("-")[-1]
                    if len(tail) == 8 and (rec.get("id", "") or "").startswith(tail):
                        target = rec; break
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})
    if not target:
        return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found"})
    if target.get("status") != "verified":
        return JSONResponse(status_code=404, content={"ok": False, "reason": "not_verified",
                                                       "hint": "Profile is only public after admin verification."})
    return {"ok": True, "lawyer": _lawyer_public_view(target)}


@app.get("/v1/lawyer/directory")
def lawyer_directory(domain: str = "", city: str = "", limit: int = 50):  # type: ignore
    """v2.99.155 — Public directory of verified lawyers. Optional filters."""
    if not _LAWYER_APP_PATH.exists():
        return {"ok": True, "n": 0, "items": []}
    items = []
    try:
        with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if rec.get("status") != "verified":
                    continue
                if domain and domain not in (rec.get("domains", []) or []):
                    continue
                if city and (rec.get("city", "") or "").strip() != city.strip():
                    continue
                items.append(_lawyer_public_view(rec))
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    # Sort: most experienced first
    items.sort(key=lambda x: -(x.get("years_experience") or 0))
    items = items[:max(1, min(limit, 200))]
    return {"ok": True, "n": len(items), "items": items}


@app.get("/v1/public/reviews")
def public_reviews(limit: int = 12):  # type: ignore
    """v2.99.212 — Public reviews feed. Returns recent client reviews with
    rating + (truncated) text. Anonymizes the reviewer (first letter only).
    Resolves lawyer slug + name for linking. Cached 5min."""
    import os, json as _json, datetime as _dt
    from fastapi.responses import JSONResponse as _JR

    limit = max(1, min(int(limit), 50))
    out_items = []
    avg = None
    total_count = 0

    # Build lawyer_id → public_view map (needs to expose name + slug)
    lawyer_map = {}
    if _LAWYER_APP_PATH.exists():
        try:
            with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: rec = _json.loads(line)
                    except: continue
                    if rec.get("status") != "verified":
                        continue
                    lid = rec.get("id", "")
                    if lid:
                        lawyer_map[lid] = {
                            "name": rec.get("full_name", ""),
                            "slug": _lawyer_public_slug(rec),
                            "domains": rec.get("domains", []) or [],
                        }
        except Exception: pass

    # Walk request log to find approved reviews
    if _LAWYER_REQUEST_PATH.exists():
        rated = []
        try:
            with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: rec = _json.loads(line)
                    except: continue
                    rating = rec.get("review_rating")
                    if not (isinstance(rating, (int, float)) and 1 <= rating <= 5):
                        continue
                    total_count += 1
                    rated.append(rec)
        except Exception: pass

        if rated:
            avg = round(sum(r["review_rating"] for r in rated) / len(rated), 2)

        # Sort by review_iso desc (fall back to iso)
        rated.sort(key=lambda r: r.get("review_iso") or r.get("iso") or "", reverse=True)
        for rec in rated[:limit]:
            lid = rec.get("assigned_lawyer_id", "")
            L = lawyer_map.get(lid, {})
            requester_name = (rec.get("name") or "").strip()
            # Anonymize: first name only, then "*****"
            first = requester_name.split()[0] if requester_name else "לקוח"
            anon = first + " " + ("*" * 4) if requester_name else "לקוח אנונימי"
            text = (rec.get("review_text") or "").strip()
            out_items.append({
                "rating":        rec.get("review_rating"),
                "text":          text[:280],
                "iso":           (rec.get("review_iso") or rec.get("iso") or "")[:10],
                "reviewer":      anon,
                "lawyer_name":   L.get("name", ""),
                "lawyer_slug":   L.get("slug", ""),
                "lawyer_domain": (L.get("domains") or [None])[0],
            })

    return _JR(
        content={
            "ok": True,
            "n":          len(out_items),
            "total_count": total_count,
            "review_avg": avg,
            "items":      out_items,
        },
        headers={
            "Cache-Control": "public, max-age=300, s-maxage=300",
            "Access-Control-Allow-Origin": "*",
        },
    )


@app.get("/v1/public/stats")
def public_platform_stats():  # type: ignore
    """v2.99.202 — Public stats widget. Returns aggregate counts safe for
    public display: verified lawyer count, total Q&A logged, total reviews,
    average rating, days running, recent activity. No PII, no admin scope.
    Cached at the edge for 5 minutes."""
    import os, json as _json, datetime as _dt
    from fastapi.responses import JSONResponse as _JR

    # Static project launch date (Day 0 = 2026-04-07; legal-eye public Day 48)
    LAUNCH_DATE = _dt.date(2026, 4, 7)
    days_running = max(1, (_dt.date.today() - LAUNCH_DATE).days)

    out = {
        "ok": True,
        "days_running": days_running,
        "lawyers_verified": 0,
        "lawyers_pending": 0,
        "lawyer_domains": 0,
        "questions_answered": 0,
        "reviews_count": 0,
        "review_avg": None,
        "newsletter_subscribers": 0,
        "last_updated": _dt.datetime.utcnow().isoformat(timespec="seconds") + "Z",
    }

    # Verified lawyers + domains coverage
    try:
        if _LAWYER_APP_PATH.exists():
            domains_seen = set()
            with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: rec = _json.loads(line)
                    except: continue
                    status = rec.get("status", "")
                    if status == "verified":
                        out["lawyers_verified"] += 1
                        for d in (rec.get("domains") or []):
                            domains_seen.add(d)
                    elif status in ("pending", "submitted"):
                        out["lawyers_pending"] += 1
            out["lawyer_domains"] = len(domains_seen)
    except Exception:
        pass

    # Questions answered (lawyer_qa.db row count)
    try:
        conn = _lawyer_qa_db()
        cur = conn.execute("SELECT COUNT(*) FROM lawyer_qa")
        out["questions_answered"] = int(cur.fetchone()[0] or 0)
    except Exception:
        pass

    # Reviews aggregate (from lawyer_requests.jsonl review_rating field)
    try:
        if _LAWYER_REQUEST_PATH.exists():
            ratings = []
            with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: rec = _json.loads(line)
                    except: continue
                    rating = rec.get("review_rating")
                    if isinstance(rating, (int, float)) and 1 <= rating <= 5:
                        ratings.append(float(rating))
            if ratings:
                out["reviews_count"] = len(ratings)
                out["review_avg"] = round(sum(ratings) / len(ratings), 2)
    except Exception:
        pass

    # Newsletter subscribers (active only)
    try:
        if _NEWSLETTER_PATH.exists():
            with open(_NEWSLETTER_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if not line: continue
                    try: rec = _json.loads(line)
                    except: continue
                    if rec.get("status") == "active":
                        out["newsletter_subscribers"] += 1
    except Exception:
        pass

    return _JR(
        content=out,
        headers={
            "Cache-Control": "public, max-age=300, s-maxage=300",
            "Access-Control-Allow-Origin": "*",
        },
    )


@app.get("/v1/lawyer-onboarding/list")
def lawyer_onboarding_list(token: str = "", with_stats: bool = True):  # type: ignore
    """Admin: list all lawyer applications.
    v2.99.147 — by default also inlines stats per lawyer (KPIs)."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if not _LAWYER_APP_PATH.exists():
        return {"ok": True, "n": 0, "items": []}
    items = []
    try:
        with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: items.append(json.loads(line))
                except: continue
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    if with_stats:
        try:
            from ..matching import stats_for_all_lawyers
            all_stats = stats_for_all_lawyers()
            for rec in items:
                rec["stats"] = all_stats.get(rec.get("id", ""), {})
        except Exception as e:
            print(f"[lawyer-list] stats compute FAIL: {e}")
    return {"ok": True, "n": len(items), "items": items}


class _LawyerRequestNoteBody(BaseModel):  # type: ignore
    id:    str
    note:  str


@app.post("/v1/admin/lawyer-request/note")
def lawyer_request_note(req: _LawyerRequestNoteBody, token: str = ""):  # type: ignore
    """v2.99.196 — Admin: append a timestamped CRM note to a lawyer
    request record. Persistent, append-only. Same pattern as
    /v1/admin/lawyer-application/note but for requests."""
    import os, time, uuid
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    note_text = (req.note or "").strip()
    if not note_text or len(note_text) < 2:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "note_empty"})
    if not _LAWYER_REQUEST_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})

    note_entry = {
        "id":   str(uuid.uuid4())[:8],
        "ts":   time.time(),
        "iso":  time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
        "text": note_text[:1000],
    }

    with _LAWYER_REQUEST_LOCK:
        items = []
        try:
            with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
        except Exception as e:
            return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
        found = False
        for rec in items:
            if rec.get("id") == req.id:
                if "notes_history" not in rec or not isinstance(rec.get("notes_history"), list):
                    rec["notes_history"] = []
                rec["notes_history"].append(note_entry)
                found = True
                break
        if not found:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"})
        tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp")
        with open(tmp, "w", encoding="utf-8") as f:
            for rec in items:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        tmp.replace(_LAWYER_REQUEST_PATH)

    _audit_log(actor="admin", role="admin", action="request_note_added",
               target=req.id, meta={"note_id": note_entry["id"], "len": len(note_text)})
    return {"ok": True, "note": note_entry}


class _LawyerAppNoteBody(BaseModel):  # type: ignore
    id:    str
    note:  str


@app.post("/v1/admin/lawyer-application/note")
def lawyer_application_note(req: _LawyerAppNoteBody, token: str = ""):  # type: ignore
    """v2.99.194 — Admin: append a timestamped note to a lawyer's record.
    Persistent across status changes (unlike admin_note which is overwritten).
    Useful for CRM-style outreach tracking."""
    import os, time, uuid
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    note_text = (req.note or "").strip()
    if not note_text or len(note_text) < 2:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "note_empty"})
    if not _LAWYER_APP_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})

    note_entry = {
        "id":   str(uuid.uuid4())[:8],
        "ts":   time.time(),
        "iso":  time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
        "text": note_text[:1000],
    }

    with _LAWYER_APP_LOCK:
        items = []
        try:
            with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
        except Exception as e:
            return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
        found = False
        for rec in items:
            if rec.get("id") == req.id:
                if "notes_history" not in rec or not isinstance(rec.get("notes_history"), list):
                    rec["notes_history"] = []
                rec["notes_history"].append(note_entry)
                found = True
                break
        if not found:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"})
        tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp")
        with open(tmp, "w", encoding="utf-8") as f:
            for rec in items:
                f.write(json.dumps(rec, ensure_ascii=False) + "\n")
        tmp.replace(_LAWYER_APP_PATH)

    _audit_log(actor="admin", role="admin", action="lawyer_note_added",
               target=req.id, meta={"note_id": note_entry["id"], "len": len(note_text)})
    return {"ok": True, "note": note_entry}


class _LawyerAppUpdateBody(BaseModel):  # type: ignore
    id:           str
    status:       str           # submitted | under_review | verified | rejected
    admin_note:   Optional[str] = ""


# v2.99.178 — Lawyer profile photos
_LAWYER_PHOTO_DIR = _wa_pl.Path("tau_rag/runtime/lawyer_photos")
_ALLOWED_PHOTO_EXTS = {"jpg", "jpeg", "png", "webp"}
_MAX_PHOTO_BYTES = 4 * 1024 * 1024   # 4MB


@app.post("/v1/lawyer-onboarding/photo")
async def lawyer_photo_upload(request: Request, lawyer_id: str = ""):  # type: ignore
    """v2.99.178 — Upload/replace a lawyer's profile photo.
    Auth: signed-in lawyer (via session) updating their own record,
    OR admin via ?token=. PNG/JPEG/WEBP only, max 4MB.
    Stored on disk at tau_rag/runtime/lawyer_photos/{lawyer_id}.{ext}.
    Updates the lawyer record's photo_url field."""
    import os, time
    # Auth: either admin token or self-update via signed-in session
    is_admin = False
    sess = None
    try:
        admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
        q_token = request.query_params.get("token", "")
        if admin_token and q_token == admin_token:
            is_admin = True
    except Exception: pass
    if not is_admin:
        try:
            from ..auth import session_from_header
            sess = session_from_header(request.headers.get("authorization"))
        except Exception: pass
    if not (is_admin or sess):
        return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})

    if not lawyer_id:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "missing_lawyer_id"})

    # Read multipart file
    try:
        form = await request.form()
        file = form.get("photo")
        if not file or not hasattr(file, "filename"):
            return JSONResponse(status_code=400, content={"ok": False, "reason": "missing_photo"})
        content = await file.read()
        if len(content) > _MAX_PHOTO_BYTES:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "file_too_large",
                "max_bytes": _MAX_PHOTO_BYTES,
            })
        # Determine extension
        fname = (file.filename or "").lower()
        ext = fname.rsplit(".", 1)[-1] if "." in fname else ""
        if ext not in _ALLOWED_PHOTO_EXTS:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "invalid_extension",
                "allowed": list(_ALLOWED_PHOTO_EXTS),
            })
    except Exception as e:
        return JSONResponse(status_code=400, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })

    # Locate the lawyer record
    if not _LAWYER_APP_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "lawyer_not_found"})
    items = []
    try:
        with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: items.append(json.loads(line))
                except: continue
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})

    target = None
    for r in items:
        if r.get("id") == lawyer_id:
            target = r; break
    if not target:
        return JSONResponse(status_code=404, content={"ok": False, "reason": "lawyer_not_found"})

    # If self-update path, verify email matches signed-in session
    if not is_admin and sess:
        if (target.get("email", "") or "").lower() != (sess.get("email", "") or "").lower():
            return JSONResponse(status_code=403, content={"ok": False, "reason": "not_your_record"})

    # Save photo
    _LAWYER_PHOTO_DIR.mkdir(parents=True, exist_ok=True)
    # Delete any old photo (different ext) so we don't accumulate
    for old in _LAWYER_PHOTO_DIR.glob(f"{lawyer_id}.*"):
        try: old.unlink()
        except Exception: pass
    target_path = _LAWYER_PHOTO_DIR / f"{lawyer_id}.{ext}"
    try:
        target_path.write_bytes(content)
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": f"write: {e}"})

    # Update lawyer record with photo_url
    photo_url = f"/v1/lawyer/photo/{lawyer_id}"
    target["photo_url"] = photo_url
    target["photo_ext"] = ext
    target["photo_updated_ts"] = time.time()

    # Atomic rewrite of JSONL
    with _LAWYER_APP_LOCK:
        tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp")
        with open(tmp, "w", encoding="utf-8") as f:
            for r in items:
                f.write(json.dumps(r, ensure_ascii=False) + "\n")
        tmp.replace(_LAWYER_APP_PATH)

    _audit_log(
        actor=target.get("email", "") if not is_admin else "admin",
        role="lawyer" if not is_admin else "admin",
        action="lawyer_photo_uploaded",
        target=lawyer_id,
        meta={"ext": ext, "size": len(content)},
    )

    return {"ok": True, "photo_url": photo_url, "size": len(content)}


@app.get("/v1/lawyer/photo/{lawyer_id}")
def lawyer_photo_get(lawyer_id: str):  # type: ignore
    """v2.99.178 — Serve a lawyer's profile photo. Public (verified lawyers only)."""
    from fastapi.responses import Response, FileResponse
    if not _LAWYER_APP_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found"})
    # Verify lawyer is verified (don't leak photos of pending applications)
    is_verified = False
    try:
        with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if rec.get("id") == lawyer_id and rec.get("status") == "verified":
                    is_verified = True
                    break
    except Exception: pass
    if not is_verified:
        return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found"})
    # Find the file
    for ext in _ALLOWED_PHOTO_EXTS:
        p = _LAWYER_PHOTO_DIR / f"{lawyer_id}.{ext}"
        if p.exists():
            media_type = {"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png","webp":"image/webp"}[ext]
            return FileResponse(p, media_type=media_type, headers={
                "Cache-Control": "public, max-age=86400",
            })
    return JSONResponse(status_code=404, content={"ok": False, "reason": "no_photo"})


@app.post("/v1/lawyer-onboarding/update")
def lawyer_onboarding_update(req: _LawyerAppUpdateBody, token: str = ""):  # type: ignore
    """Admin: update a lawyer application status."""
    import os, time
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    allowed = {"submitted", "under_review", "verified", "rejected"}
    if req.status not in allowed:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_status"})
    if not _LAWYER_APP_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})
    with _LAWYER_APP_LOCK:
        items = []
        try:
            with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
        except Exception as e:
            return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
        found = False
        for rec in items:
            if rec.get("id") == req.id:
                rec["status"]      = req.status
                rec["updated_ts"]  = time.time()
                rec["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
                if req.admin_note: rec["admin_note"] = req.admin_note
                found = True; break
        if not found:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"})
        try:
            tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for rec in items:
                    f.write(json.dumps(rec, ensure_ascii=False) + "\n")
            tmp.replace(_LAWYER_APP_PATH)
        except Exception as e:
            return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    # v2.99.144 — audit
    _audit_log(
        actor="admin", role="admin", action="lawyer_application_status_update",
        target=req.id,
        meta={"new_status": req.status, "admin_note": (req.admin_note or "")[:200]},
    )
    return {"ok": True, "id": req.id, "status": req.status}


# ──────────────────────────────────────────────────────────────────────
# v2.99.130 — Auth v0 (magic-link email)
# ──────────────────────────────────────────────────────────────────────
class _AuthRequestLinkBody(BaseModel):  # type: ignore
    email: str


def _send_magic_link_email(email: str, link: str) -> bool:
    """v2.99.151 — Send via branded multipart helper."""
    body = (
        "שלום,\n\n"
        "ביקשת להיכנס ל-legal-eye. לחץ על הכפתור למטה כדי להמשיך.\n\n"
        "הקישור תקף ל-15 דקות.\n"
        "אם לא ביקשת להיכנס — אפשר פשוט להתעלם מהאימייל.\n\n"
        "אם הכפתור לא עובד, העתק את הקישור הבא לדפדפן:\n"
        f"{link}"
    )
    ok = _send_email_branded(
        email,
        "legal-eye · קישור כניסה",
        body,
        title="קישור כניסה ל-legal-eye",
        cta_label="היכנס לחשבון ←",
        cta_url=link,
    )
    if ok:
        print(f"[auth] magic link sent → {email}")
    else:
        print(f"[auth] magic link not sent (SMTP unconfigured?) → {email}")
    return ok


@app.post("/v1/auth/request-link")
def auth_request_link(req: _AuthRequestLinkBody, request: Request):  # type: ignore
    """Generate magic link + email it. If no SMTP configured, returns
    the token in dev-only fashion (logged)."""
    import os
    try:
        from ..auth import request_magic_link
        result = request_magic_link(req.email)
        if not result["ok"]:
            return JSONResponse(status_code=400, content=result)
        # Build the verification URL — host from request, fallback to env
        public_host = os.environ.get("LE_PUBLIC_URL", "").strip().rstrip("/")
        if not public_host:
            # Derive from request — used by HF API but client-app lives on Vercel
            # so default to the known public domain.
            public_host = "https://legal-eye.1bigfam.com"
        link = f"{public_host}/?auth_token={result['token']}"
        sent = _send_magic_link_email(result["email"], link)
        # Always return success — don't leak which email is registered
        resp = {
            "ok":         True,
            "message":    "אם האימייל תקין — שלחנו קישור כניסה. בדוק את תיבת הדואר (וגם ספאם).",
            "email_sent": sent,
        }
        # Dev-only: if no SMTP, return the link so user can still test
        if not sent:
            resp["dev_link"] = link
            resp["dev_note"] = "SMTP not configured — manual link returned for testing"
        return resp
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.get("/v1/auth/verify")
def auth_verify(token: str):  # type: ignore
    """Consume a magic token, return session_token + user."""
    try:
        from ..auth import verify_magic_link
        result = verify_magic_link(token)
        if not result["ok"]:
            return JSONResponse(status_code=400, content=result)
        # v2.99.144 — audit
        _audit_log(
            actor=result["user"]["email"], role="user", action="auth_signin",
            target=result["user"]["id"], meta={},
        )
        return result
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.get("/v1/auth/me")
def auth_me(request: Request):  # type: ignore
    """Return current user from Authorization: Bearer <session_token>."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        return {
            "ok":     True,
            "user":   {"id": sess["user_id"], "email": sess["email"]},
            "expires_ts": sess["expires_ts"],
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.post("/v1/auth/logout")
def auth_logout(request: Request):  # type: ignore
    """Invalidate the current session."""
    try:
        from ..auth import session_from_header, logout
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return {"ok": True, "already_logged_out": True}
        logout(sess["session_token"])
        return {"ok": True}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.get("/v1/lawyer/me")
def lawyer_me(request: Request):  # type: ignore
    """v2.99.134 — If the signed-in user's email matches a lawyer application,
    return that lawyer's record + status. Otherwise return is_lawyer=false."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        email = sess["email"]
        if not _LAWYER_APP_PATH.exists():
            return {"ok": True, "is_lawyer": False, "email": email}
        with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if rec.get("email") == email:
                    return {
                        "ok":        True,
                        "is_lawyer": True,
                        "email":     email,
                        "lawyer":    rec,
                    }
        return {"ok": True, "is_lawyer": False, "email": email}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


class _LawyerAvailabilityBody(BaseModel):  # type: ignore
    status: str    # available | busy | unavailable


class _LawyerWhatsappBody(BaseModel):  # type: ignore
    """v2.99.204 — opt-in to publish WhatsApp link on public profile."""
    public: bool


@app.post("/v1/lawyer/me/availability")
def lawyer_set_availability(body: _LawyerAvailabilityBody, request: Request):  # type: ignore
    """v2.99.139 — Verified lawyer toggles their availability."""
    import time
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        if body.status not in ("available", "busy", "unavailable"):
            return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_status"})
        if not _LAWYER_APP_PATH.exists():
            return JSONResponse(status_code=404, content={"ok": False, "reason": "no_applications"})
        with _LAWYER_APP_LOCK:
            items = []
            with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
            found = False
            for rec in items:
                if rec.get("email") == sess["email"]:
                    if rec.get("status") != "verified":
                        return JSONResponse(status_code=403, content={"ok": False, "reason": "not_verified"})
                    rec["availability_status"]   = body.status
                    rec["availability_changed_ts"]  = time.time()
                    rec["availability_changed_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
                    found = True
                    break
            if not found:
                return JSONResponse(status_code=404, content={"ok": False, "reason": "not_a_lawyer"})
            tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for rec in items:
                    f.write(json.dumps(rec, ensure_ascii=False) + "\n")
            tmp.replace(_LAWYER_APP_PATH)
        # v2.99.144 — audit
        _audit_log(
            actor=sess["email"], role="lawyer", action="availability_set",
            target=sess["email"], meta={"status": body.status},
        )
        return {"ok": True, "status": body.status}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


@app.post("/v1/lawyer/me/whatsapp")
def lawyer_set_whatsapp_public(body: _LawyerWhatsappBody, request: Request):  # type: ignore
    """v2.99.204 — Verified lawyer opts in/out of showing WhatsApp on public profile.
    Requires `phone` to be set on the lawyer record."""
    import time
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        if not _LAWYER_APP_PATH.exists():
            return JSONResponse(status_code=404, content={"ok": False, "reason": "no_applications"})
        with _LAWYER_APP_LOCK:
            items = []
            with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
            found = False
            for rec in items:
                if rec.get("email") == sess["email"]:
                    if rec.get("status") != "verified":
                        return JSONResponse(status_code=403, content={"ok": False, "reason": "not_verified"})
                    if body.public and not (rec.get("phone") or "").strip():
                        return JSONResponse(status_code=400, content={"ok": False, "reason": "no_phone_on_file"})
                    rec["whatsapp_public_optin"] = bool(body.public)
                    rec["whatsapp_changed_iso"]  = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
                    found = True
                    break
            if not found:
                return JSONResponse(status_code=404, content={"ok": False, "reason": "not_a_lawyer"})
            tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for rec in items:
                    f.write(json.dumps(rec, ensure_ascii=False) + "\n")
            tmp.replace(_LAWYER_APP_PATH)
        _audit_log(
            actor=sess["email"], role="lawyer", action="whatsapp_public_set",
            target=sess["email"], meta={"public": bool(body.public)},
        )
        return {"ok": True, "public": bool(body.public)}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


@app.get("/v1/lawyer/me/stats")
def lawyer_me_stats(request: Request):  # type: ignore
    """v2.99.146 — KPIs for the signed-in lawyer."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        from ..matching import stats_for_lawyer
        result = stats_for_lawyer(sess["email"])
        if not result.get("ok"):
            return JSONResponse(status_code=403, content=result)
        return result
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


@app.get("/v1/lawyer/me/requests")
def lawyer_me_requests(request: Request):  # type: ignore
    """v2.99.135 — Requests assigned to this lawyer (any response state)."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        # Verify they're a verified lawyer
        if not _LAWYER_APP_PATH.exists():
            return {"ok": True, "n": 0, "items": [], "reason": "no_applications"}
        lawyer = None
        with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
            for line in f:
                try: rec = json.loads(line.strip())
                except: continue
                if rec.get("email") == sess["email"]:
                    lawyer = rec; break
        if not lawyer:
            return JSONResponse(status_code=403, content={"ok": False, "reason": "not_a_lawyer"})
        if lawyer.get("status") != "verified":
            return {
                "ok":          True,
                "n":           0,
                "items":       [],
                "reason":      "lawyer_not_verified",
                "your_status": lawyer.get("status", "submitted"),
            }
        from ..matching import requests_for_lawyer
        items = requests_for_lawyer(sess["email"])
        # Strip user_email + contact details — lawyer doesn't see those until accepted
        for it in items:
            if it.get("lawyer_response") != "accepted":
                # Mask PII for not-yet-accepted requests
                it["contact"] = "***"
                it["user_email"] = None
                it["name"] = ""
        return {"ok": True, "n": len(items), "items": items}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


# ── Admin: matching ──
@app.get("/v1/lawyer-request/{request_id}/match-candidates")
def lawyer_request_match_candidates(request_id: str, token: str = "", top: int = 3):  # type: ignore
    """Admin: score verified lawyers as candidates for this request."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    try:
        from ..matching import match_candidates
        return match_candidates(request_id, top=top)
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


def _notify_lawyer_offered_async(record: dict, lawyer_email: str, lawyer_name: str, n_total: int) -> None:
    """v2.99.141 — Email a lawyer who's been OFFERED a request alongside others.
    Same masked-PII as direct assignment, but with race-to-accept framing."""
    import threading, os
    if not lawyer_email:
        return
    # v2.99.143 — in-app notification
    domain_he = {
        "employment_law": "דיני עבודה", "contracts": "דיני חוזים",
        "consumer": "דיני צרכנות", "tenancy": "שכירות / מקרקעין",
    }.get(record.get("domain", ""), record.get("domain", ""))
    _create_notification(
        lawyer_email,
        kind="lawyer_offered",
        title=f"⚡ הצעה תחרותית · {n_total} עו\"ד",
        body=f"תחום: {domain_he or '(לא סווג)'} · ראשון לקבל זוכה",
        link="/lawyer-dashboard/",
        related_request_id=record.get("id", ""),
    )
    def _worker():
        q_short = (record.get("question") or "")[:300]
        urgent = bool(record.get("urgent"))
        domain_he_inner = {
            "employment_law": "דיני עבודה", "contracts": "דיני חוזים",
            "consumer": "דיני צרכנות", "tenancy": "שכירות / מקרקעין",
        }.get(record.get("domain", ""), record.get("domain", ""))
        body = (
            f"שלום עו\"ד {lawyer_name},\n\n"
            f"פנייה חדשה הוצעה לך ב-legal-eye — יחד עם {n_total - 1} עו\"ד נוספים.\n"
            f"⚡ ראשון לקבל זוכה. האחרים מקבלים הודעת ביטול.\n\n"
            f"תחום: {domain_he_inner or '(לא סווג)'}\n"
            f"דחיפות: {'⚠ דחוף' if urgent else 'רגיל'}\n"
            f"מועמדים: {n_total} עו\"ד (כולל אותך)\n\n"
            f"השאלה של הלקוח:\n"
            f"\"{q_short}{'...' if len(record.get('question','')) > 300 else ''}\"\n\n"
            f"אם הפנייה לא מתאימה — לחץ \"דחה\" וההצעה תופנה לאחרים.\n\n"
            f"מזהה: {record.get('id', '?')[:8]}"
        )
        subject = (
            f"[legal-eye] {'⚠ פנייה דחופה' if urgent else 'פנייה חדשה'} · {domain_he_inner or '?'} · "
            f"({n_total} מועמדים) · {record.get('id','?')[:8]}"
        )
        ok = _send_email_branded(
            lawyer_email, subject, body,
            title=f"⚡ הצעה תחרותית · {n_total} עו\"ד",
            cta_label="קבל את הפנייה ←",
            cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/",
        )
        if ok:
            print(f"[lawyer-offered] sent → {lawyer_email}")
    threading.Thread(target=_worker, daemon=True).start()


def _notify_user_lawyer_declined_async(record: dict, kind: str) -> None:
    """v2.99.145 — Notify the user when a lawyer declines.
      kind='direct'        — 1-1 assignment lawyer declined
      kind='all_declined'  — all candidates in a multi-offer race declined
    Fires email + in-app notification."""
    import threading, os
    if not record.get("user_email"):
        return
    # In-app first (always)
    if kind == "all_declined":
        title = "מחפשים עו\"ד אחר עבורך"
        body_short = "כל העו\"ד שעמדו לרשותך לא היו זמינים. אנחנו כעת מחפשים נוספים — נחזור אליך בקרוב."
    else:
        title = "השיוך התעדכן — מחפשים עו\"ד אחר"
        body_short = "עו\"ד שהיינו בקשר איתו לא יכול לקחת את הפנייה. אנחנו כעת מחפשים חלופה."
    _create_notification(
        record["user_email"],
        kind="lawyer_declined",
        title=title,
        body=body_short,
        link="/my-matters/",
        related_request_id=record.get("id", ""),
    )

    def _worker():
        if kind == "all_declined":
            subject = "legal-eye · מחפשים עו\"ד אחר עבורך"
            title_email = "מחפשים עו\"ד אחר עבורך"
            body_email = (
                f"שלום,\n\nכל העו\"ד שעמדו לרשותך בפנייה ב-legal-eye לא היו זמינים כרגע.\n"
                f"אנחנו כעת מחפשים עו\"ד אחרים — נחזור אליך תוך 24 שעות.\n\n"
                f"לא נדרשת ממך פעולה.\n\nמזהה: {record.get('id', '?')[:8]}"
            )
        else:
            subject = "legal-eye · השיוך התעדכן — מחפשים עו\"ד אחר"
            title_email = "השיוך התעדכן"
            body_email = (
                f"שלום,\n\nעו\"ד שהיינו בקשר איתו על הפנייה שלך לא יכול לקחת אותה.\n"
                f"אנחנו כעת מחפשים עו\"ד מתאים אחר — נחזור אליך תוך 24 שעות.\n\n"
                f"לא נדרשת ממך פעולה. תהליך תקין בעת התאמה.\n\nמזהה: {record.get('id', '?')[:8]}"
            )
        ok = _send_email_branded(
            record["user_email"], subject, body_email, title=title_email,
            cta_label="לאזור האישי ←",
            cta_url="https://legal-eye.1bigfam.com/my-matters/",
        )
        if ok:
            print(f"[user-decline-notify] {kind} → {record['user_email']}")
    threading.Thread(target=_worker, daemon=True).start()


def _notify_lawyer_offer_cancelled_async(record: dict, lawyer_email: str, lawyer_name: str, winner_name: str) -> None:
    """v2.99.141 — Notify a lawyer that they lost the race."""
    import threading, os
    if not lawyer_email: return
    # v2.99.143 — in-app notification
    _create_notification(
        lawyer_email,
        kind="offer_cancelled",
        title="ההצעה בוטלה — שובצה לעו\"ד אחר",
        body=f"לא נדרשת פעולה. הצוות יציע לך פניות נוספות בקרוב.",
        link="/lawyer-dashboard/",
        related_request_id=record.get("id", ""),
    )
    def _worker():
        body = (
            f"שלום עו\"ד {lawyer_name},\n\n"
            f"הפנייה שהוצעה לך (מזהה {record.get('id','?')[:8]}) שובצה כבר לעו\"ד אחר.\n"
            f"אין צורך לפעול — הצוות יציע לך פניות נוספות בקרוב."
        )
        ok = _send_email_branded(
            lawyer_email,
            "legal-eye · ההצעה בוטלה — שובצה לעו״ד אחר",
            body,
            title="ההצעה בוטלה — שובצה לעו\"ד אחר",
            cta_label="לדשבורד ←",
            cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/",
        )
        if ok:
            print(f"[lawyer-cancelled] sent → {lawyer_email}")
    threading.Thread(target=_worker, daemon=True).start()


class _OfferBody(BaseModel):  # type: ignore
    lawyer_ids: List[str]


@app.post("/v1/lawyer-request/{request_id}/offer-to-lawyers")
def lawyer_request_offer_to_lawyers(request_id: str, body: _OfferBody, token: str = ""):  # type: ignore
    """Admin: offer the same request to N lawyers; first to accept wins."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    try:
        from ..matching import offer_to_lawyers
        result = offer_to_lawyers(request_id, body.lawyer_ids)
        if not result.get("ok"):
            return JSONResponse(status_code=400, content=result)
        # Fire offer emails to all
        for lw in result.get("lawyers", []):
            for r in _read_jsonl(_LAWYER_REQUEST_PATH):
                if r.get("id") == request_id:
                    _notify_lawyer_offered_async(r, lw["email"], lw["name"], result["n_offered"])
                    break
        # v2.99.144 — audit
        _audit_log(
            actor="admin", role="admin", action="lawyers_offered",
            target=request_id,
            meta={"n_offered": result["n_offered"], "lawyer_ids": body.lawyer_ids},
        )
        return result
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


class _AssignBody(BaseModel):  # type: ignore
    lawyer_id: str


@app.post("/v1/lawyer-request/{request_id}/assign")
def lawyer_request_assign(request_id: str, body: _AssignBody, token: str = ""):  # type: ignore
    """Admin: assign a request to a specific lawyer."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    try:
        from ..matching import assign_lawyer
        result = assign_lawyer(request_id, body.lawyer_id)
        if not result.get("ok"):
            return JSONResponse(status_code=400, content=result)
        # Notify user (status change to "contacted") + lawyer (new assignment)
        for r in _read_jsonl(_LAWYER_REQUEST_PATH):
            if r.get("id") == request_id:
                # User: only if status actually transitioned new→contacted
                if result.get("user_email") and result.get("old_status") == "new":
                    _notify_user_status_change_async(r, "new", "contacted")
                # Lawyer: every time an assignment is made
                _notify_lawyer_assigned_async(
                    r,
                    r.get("assigned_lawyer_email", ""),
                    r.get("assigned_lawyer_name", ""),
                )
                break
        # v2.99.144 — audit
        _audit_log(
            actor="admin", role="admin", action="lawyer_assigned",
            target=request_id,
            meta={"lawyer_id": body.lawyer_id, "lawyer_name": result.get("lawyer_name", "")},
        )
        return result
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


# ──────────────────────────────────────────────────────────────────────
# v2.99.137 — Deliverable lifecycle. After a lawyer accepts an assigned
# request, they submit a deliverable (title + body text + optional file).
# Status: contacted → submitted_for_approval → approved | disputed
# ──────────────────────────────────────────────────────────────────────
def _notify_auto_release_async(record: dict) -> None:
    """v2.99.138 — Email both parties when a deliverable auto-releases."""
    import threading, os
    if not (record.get("user_email") or record.get("assigned_lawyer_email")):
        return
    # v2.99.143 — in-app notifications
    if record.get("user_email"):
        _create_notification(
            record["user_email"],
            kind="auto_release",
            title="⏱ הפנייה נסגרה אוטומטית (72 שעות)",
            body="חלפו 72 שעות מאז הגשת התוצר. לא נדרשת פעולה.",
            link="/my-matters/",
            related_request_id=record.get("id", ""),
        )
    if record.get("assigned_lawyer_email"):
        _create_notification(
            record["assigned_lawyer_email"],
            kind="auto_release",
            title="⏱ התוצר שלך שוחרר אוטומטית",
            body="הלקוח לא הגיב תוך 72 שעות.",
            link="/lawyer-dashboard/",
            related_request_id=record.get("id", ""),
        )

    def _worker():
        rid = record.get("id", "?")[:8]
        if record.get("user_email"):
            body = (
                f"שלום,\n\n"
                f"חלפו 72 שעות מאז שעו\"ד {record.get('assigned_lawyer_name','')} הגיש לך את התוצר ב-legal-eye. "
                f"מאחר שלא הגעת לאישור או למחלוקת, הפנייה נסגרה אוטומטית.\n\n"
                f"אם יש לך שאלות המשך — אפשר לפתוח פנייה חדשה.\n\nמזהה: {rid}"
            )
            _send_email_branded(
                record["user_email"],
                "legal-eye · הפנייה נסגרה אוטומטית (72 שעות)",
                body,
                title="⏱ הפנייה נסגרה אוטומטית (72 שעות)",
                cta_label="לאזור האישי ←",
                cta_url="https://legal-eye.1bigfam.com/my-matters/",
            )
        if record.get("assigned_lawyer_email"):
            body = (
                f"שלום עו\"ד {record.get('assigned_lawyer_name','')},\n\n"
                f"חלפו 72 שעות מאז שהגשת את התוצר על פנייה {rid}.\n"
                f"הלקוח לא הגיב — הפנייה נסגרה אוטומטית (auto-release).\n\n"
                f"(תשלום ישוחרר אוטומטית בעתיד הקרוב, כשמודול התשלומים יושק)\n\nמזהה: {rid}"
            )
            _send_email_branded(
                record["assigned_lawyer_email"],
                "legal-eye · התוצר שלך שוחרר אוטומטית",
                body,
                title="⏱ התוצר שלך שוחרר אוטומטית",
                cta_label="לדשבורד ←",
                cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/",
            )
    threading.Thread(target=_worker, daemon=True).start()


def _scan_auto_release() -> int:
    """Scan lawyer_requests.jsonl. Auto-approve any record where:
    - status == submitted_for_approval
    - deliverable_ts < (now - 72h)
    Returns count of records auto-released."""
    import time
    if not _LAWYER_REQUEST_PATH.exists():
        return 0
    cutoff = time.time() - (72 * 60 * 60)
    n = 0
    try:
        with _LAWYER_REQUEST_LOCK:
            items = []
            with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
            changed = False
            to_notify = []
            for rec in items:
                if rec.get("status") != "submitted_for_approval": continue
                if not rec.get("deliverable_ts"): continue
                if rec["deliverable_ts"] > cutoff: continue
                # Auto-release
                rec["status"]         = "done"
                rec["auto_approved"]  = True
                rec["approval_ts"]    = time.time()
                rec["approval_iso"]   = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
                rec["updated_ts"]     = time.time()
                rec["updated_iso"]    = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
                changed = True
                n += 1
                to_notify.append(rec)
            if changed:
                tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp")
                with open(tmp, "w", encoding="utf-8") as f:
                    for rec in items:
                        f.write(json.dumps(rec, ensure_ascii=False) + "\n")
                tmp.replace(_LAWYER_REQUEST_PATH)
        # Notify outside the lock
        for rec in to_notify:
            _notify_auto_release_async(rec)
            # v2.99.144 — audit
            _audit_log(
                actor="system", role="system", action="auto_release",
                target=rec.get("id", ""),
                meta={"deliverable_age_hours": round((time.time() - rec["deliverable_ts"]) / 3600, 1)},
            )
        if n: print(f"[auto-release] {n} record(s) auto-approved after 72h")
        return n
    except Exception as e:
        print(f"[auto-release] scan FAIL: {type(e).__name__}: {e}")
        return 0


def _start_auto_release_thread() -> None:
    """Background thread that calls _scan_auto_release every 30 min."""
    import threading, time
    def _loop():
        # Initial delay 60s — let app finish booting
        time.sleep(60)
        while True:
            try: _scan_auto_release()
            except Exception as e: print(f"[auto-release] loop FAIL: {e}")
            time.sleep(30 * 60)  # 30 min
    t = threading.Thread(target=_loop, daemon=True, name="auto-release")
    t.start()
    print("[auto-release] background thread started (30-min interval)")


# Kick off on module import
_start_auto_release_thread()


@app.post("/v1/admin/auto-release-now")
def admin_auto_release_now(token: str = ""):  # type: ignore
    """Admin: trigger an auto-release scan manually (for testing)."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    n = _scan_auto_release()
    return {"ok": True, "auto_released": n}


def _notify_user_deliverable_submitted_async(record: dict, lawyer_name: str) -> None:
    """Email the user when their lawyer submits a deliverable."""
    import threading, os
    if not record.get("user_email"):
        return
    # v2.99.143 — in-app notification
    _create_notification(
        record["user_email"],
        kind="deliverable_submitted",
        title=f"⚖ עו\"ד {lawyer_name} הגיש לך תוצר",
        body="בדוק ואשר/ערער תוך 72 שעות, או שחרור אוטומטי.",
        link="/my-matters/",
        related_request_id=record.get("id", ""),
    )

    def _worker():
        body = (
            f"שלום,\n\n"
            f"עו\"ד {lawyer_name} סיים את העבודה על הפנייה שלך ב-legal-eye.\n\n"
            f"בדוק את התשובה ואשר תוך 72 שעות, או פתח מחלוקת אם יש בעיה.\n"
            f"לאחר 72 שעות ללא תגובה — התשלום ישוחרר אוטומטית.\n\n"
            f"מזהה: {record.get('id', '?')[:8]}"
        )
        ok = _send_email_branded(
            record["user_email"],
            "legal-eye · התשובה המשפטית שלך מוכנה",
            body,
            title=f"⚖ עו\"ד {lawyer_name} הגיש לך תוצר",
            cta_label="קרא את התשובה ←",
            cta_url="https://legal-eye.1bigfam.com/my-matters/",
        )
        if ok:
            print(f"[deliverable-notify] sent → {record['user_email']}")
    threading.Thread(target=_worker, daemon=True).start()


def _notify_lawyer_user_action_async(record: dict, action: str, reason: str = "") -> None:
    """Email the lawyer when user approves or disputes their deliverable."""
    import threading, os
    if not record.get("assigned_lawyer_email"):
        return
    # v2.99.143 — in-app notification
    if action == "approve":
        _create_notification(
            record["assigned_lawyer_email"],
            kind="deliverable_approved",
            title="✓ הלקוח אישר את התוצר שלך",
            body="הפנייה נסגרה.",
            link="/lawyer-dashboard/",
            related_request_id=record.get("id", ""),
        )
    else:
        _create_notification(
            record["assigned_lawyer_email"],
            kind="deliverable_disputed",
            title="⚠ הלקוח פתח מחלוקת",
            body=(reason or "(לא צוין)")[:200],
            link="/lawyer-dashboard/",
            related_request_id=record.get("id", ""),
        )

    def _worker():
        lawyer_name = record.get("assigned_lawyer_name", "")
        if action == "approve":
            subject = "legal-eye · התוצר שלך אושר ✓"
            title   = "✓ הלקוח אישר את התוצר שלך"
            body = (
                f"שלום עו\"ד {lawyer_name},\n\n"
                f"הלקוח אישר את התוצר שהגשת. הפנייה נסגרה.\n\n"
                f"מזהה: {record.get('id', '?')[:8]}\n\n"
                f"(תשלום ישוחרר אוטומטית בעתיד הקרוב, כשמודול התשלומים יושק)"
            )
        else:
            subject = "legal-eye · ⚠ מחלוקת על התוצר"
            title   = "⚠ הלקוח פתח מחלוקת"
            body = (
                f"שלום עו\"ד {lawyer_name},\n\n"
                f"הלקוח פתח מחלוקת על התוצר שהגשת. הצוות יבדוק ויחזור אליך.\n\n"
                f"סיבה מהלקוח:\n{reason or '(לא צוין)'}\n\n"
                f"מזהה: {record.get('id', '?')[:8]}"
            )
        ok = _send_email_branded(
            record["assigned_lawyer_email"], subject, body, title=title,
            cta_label="לדשבורד ←",
            cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/",
        )
        if ok:
            print(f"[lawyer-action-notify] {action} → {record['assigned_lawyer_email']}")
    threading.Thread(target=_worker, daemon=True).start()


@app.post("/v1/lawyer/me/request/submit-deliverable")
async def lawyer_submit_deliverable(  # type: ignore
    request:    Request,
    request_id: str        = Form(...),
    title:      str        = Form(...),
    body:       str        = Form(...),
    file:       Optional[UploadFile] = File(None),
):
    """Lawyer submits the deliverable for an accepted request."""
    import time
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        if not title.strip() or len(body.strip()) < 10:
            return JSONResponse(status_code=400, content={"ok": False, "reason": "title_or_body_too_short"})
        # Find request + verify it's assigned to this lawyer
        if not _LAWYER_REQUEST_PATH.exists():
            return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})
        with _LAWYER_REQUEST_LOCK:
            items = []
            with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
            target = None
            for rec in items:
                if rec.get("id") == request_id:
                    target = rec; break
            if not target:
                return JSONResponse(status_code=404, content={"ok": False, "reason": "request_not_found"})
            if target.get("assigned_lawyer_email") != sess["email"]:
                return JSONResponse(status_code=403, content={"ok": False, "reason": "not_assigned_to_you"})
            if target.get("lawyer_response") != "accepted":
                return JSONResponse(status_code=400, content={"ok": False, "reason": "request_not_accepted_yet"})
            if target.get("status") == "submitted_for_approval":
                return JSONResponse(status_code=400, content={"ok": False, "reason": "deliverable_already_submitted"})
            # Optional file
            deliverable_filename = None
            deliverable_size = None
            deliverable_mime = None
            if file is not None and file.filename:
                orig_name = (file.filename or "deliverable").strip()
                ext = "." + orig_name.rsplit(".", 1)[-1].lower() if "." in orig_name else ""
                if ext not in _DOC_ALLOWED_EXT:
                    return JSONResponse(status_code=400, content={
                        "ok": False, "reason": "unsupported_filetype",
                        "allowed": sorted(_DOC_ALLOWED_EXT),
                    })
                content = await file.read()
                if len(content) > _DOC_MAX_BYTES:
                    return JSONResponse(status_code=413, content={"ok": False, "reason": "file_too_large"})
                if len(content) > 0:
                    safe_name = _re.sub(r"[^\w\-.()]+", "_", orig_name)[:120] or ("deliverable" + ext)
                    deliverable_dir = _DELIVERABLE_DIR / request_id
                    deliverable_dir.mkdir(parents=True, exist_ok=True)
                    with open(deliverable_dir / safe_name, "wb") as f:
                        f.write(content)
                    deliverable_filename = safe_name
                    deliverable_size = len(content)
                    deliverable_mime = file.content_type or "application/octet-stream"
            # Save deliverable fields onto the record
            target["deliverable_title"]     = title.strip()
            target["deliverable_body"]      = body.strip()
            target["deliverable_ts"]        = time.time()
            target["deliverable_iso"]       = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
            target["deliverable_filename"]  = deliverable_filename
            target["deliverable_size"]      = deliverable_size
            target["deliverable_mime"]      = deliverable_mime
            target["status"]                = "submitted_for_approval"
            target["updated_ts"]            = time.time()
            target["updated_iso"]           = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
            # Rewrite
            tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for rec in items:
                    f.write(json.dumps(rec, ensure_ascii=False) + "\n")
            tmp.replace(_LAWYER_REQUEST_PATH)
        # Notify user (async)
        _notify_user_deliverable_submitted_async(target, target.get("assigned_lawyer_name", ""))
        # v2.99.144 — audit
        _audit_log(
            actor=sess["email"], role="lawyer", action="deliverable_submitted",
            target=request_id,
            meta={"title": title.strip()[:120], "has_attachment": bool(deliverable_filename)},
        )
        return {
            "ok":               True,
            "request_id":       request_id,
            "status":           "submitted_for_approval",
            "has_attachment":   bool(deliverable_filename),
            "attachment_size":  deliverable_size,
            "message":          "התוצר הוגש. הלקוח קיבל אימייל ויאשר/יערער תוך 72 שעות.",
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.get("/v1/user/me/request/{request_id}/deliverable-file")
def user_get_deliverable_file(request_id: str, request: Request):  # type: ignore
    """User downloads their lawyer's deliverable file. Auth-gated by ownership."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        if not _LAWYER_REQUEST_PATH.exists():
            return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})
        target = None
        with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if rec.get("id") == request_id and rec.get("user_id") == sess["user_id"]:
                    target = rec; break
        if not target:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found_or_not_yours"})
        fname = target.get("deliverable_filename")
        if not fname:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "no_file"})
        path = _DELIVERABLE_DIR / request_id / fname
        if not path.exists():
            return JSONResponse(status_code=404, content={"ok": False, "reason": "file_missing_on_disk"})
        from fastapi.responses import FileResponse as _FileResponse
        return _FileResponse(
            path,
            media_type=target.get("deliverable_mime") or "application/octet-stream",
            filename=fname,
        )
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


class _UserApprovalBody(BaseModel):  # type: ignore
    request_id: str
    action:     str         # approve | dispute
    reason:     Optional[str] = ""
    rating:     Optional[int] = None   # v2.99.189 — 1-5 stars (approval only)
    review:     Optional[str] = ""     # v2.99.189 — optional text review


@app.post("/v1/user/me/request/approval")
def user_approve_or_dispute(body: _UserApprovalBody, request: Request):  # type: ignore
    """User approves the lawyer's deliverable, or opens a dispute."""
    import time
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        if body.action not in ("approve", "dispute"):
            return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_action"})
        with _LAWYER_REQUEST_LOCK:
            items = []
            with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
            target = None
            for rec in items:
                if rec.get("id") == body.request_id:
                    target = rec; break
            if not target:
                return JSONResponse(status_code=404, content={"ok": False, "reason": "request_not_found"})
            if target.get("user_id") != sess["user_id"]:
                return JSONResponse(status_code=403, content={"ok": False, "reason": "not_your_request"})
            if target.get("status") != "submitted_for_approval":
                return JSONResponse(status_code=400, content={"ok": False, "reason": "no_pending_deliverable"})
            target["status"]      = "done" if body.action == "approve" else "disputed"
            target["updated_ts"]  = time.time()
            target["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
            if body.action == "approve":
                target["approval_ts"]  = time.time()
                target["approval_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
                # v2.99.189 — optional rating + review
                if body.rating is not None:
                    try:
                        r = int(body.rating)
                        if 1 <= r <= 5:
                            target["review_rating"] = r
                            target["review_text"]   = (body.review or "")[:600]
                            target["review_ts"]     = time.time()
                    except Exception: pass
            else:
                target["dispute_reason"] = (body.reason or "").strip()
                target["dispute_ts"]     = time.time()
                target["dispute_iso"]    = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime())
            tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for rec in items:
                    f.write(json.dumps(rec, ensure_ascii=False) + "\n")
            tmp.replace(_LAWYER_REQUEST_PATH)
        # Notify lawyer
        _notify_lawyer_user_action_async(target, body.action, body.reason or "")
        # v2.99.144 — audit
        _audit_log(
            actor=sess["email"], role="user", action=f"deliverable_{body.action}",
            target=body.request_id,
            meta={"reason": (body.reason or "")[:200] if body.action == "dispute" else None},
        )
        return {"ok": True, "request_id": body.request_id, "new_status": target["status"]}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


class _LawyerRespondBody(BaseModel):  # type: ignore
    request_id: str
    action:     str          # accept | decline


@app.post("/v1/lawyer/me/request/respond")
def lawyer_respond_endpoint(body: _LawyerRespondBody, request: Request):  # type: ignore
    """Lawyer accepts or declines an assignment (or competitive offer)."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        from ..matching import lawyer_respond
        result = lawyer_respond(body.request_id, sess["email"], body.action)
        if not result.get("ok"):
            return JSONResponse(status_code=400, content=result)
        # v2.99.141 — If this was a race-win, fire cancellation emails to the
        # other lawyers + notify user that they got a lawyer (status change)
        if result.get("won_race"):
            for loser in (result.get("cancelled_others") or []):
                _notify_lawyer_offer_cancelled_async(
                    {"id": body.request_id},
                    loser["email"], loser.get("name", ""),
                    sess["email"],
                )
            # Also: user notify (status went new→contacted)
            for r in _read_jsonl(_LAWYER_REQUEST_PATH):
                if r.get("id") == body.request_id:
                    if r.get("user_email"):
                        _notify_user_status_change_async(r, "new", "contacted")
                    break
        # v2.99.145 — Notify user when a lawyer declines:
        #   - direct 1-1 decline → "lawyer can't take it, finding alternative"
        #   - all candidates declined in race → "no one was available, retrying"
        if result.get("is_direct_decline") or result.get("all_declined_after_race"):
            kind = "all_declined" if result.get("all_declined_after_race") else "direct"
            for r in _read_jsonl(_LAWYER_REQUEST_PATH):
                if r.get("id") == body.request_id:
                    _notify_user_lawyer_declined_async(r, kind)
                    break
        # v2.99.144 — audit
        _audit_log(
            actor=sess["email"], role="lawyer", action=f"lawyer_response_{body.action}",
            target=body.request_id,
            meta={
                "is_offered_flow": result.get("is_offered_flow", False),
                "won_race": result.get("won_race", False),
                "n_cancelled": len(result.get("cancelled_others") or []),
            },
        )
        return result
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


def _read_jsonl(path):
    """Local helper for reading lawyer_requests in the assign endpoint."""
    import json as _j
    if not path.exists(): return []
    out = []
    try:
        with open(path, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
                    try: out.append(_j.loads(line))
                    except: continue
    except Exception: pass
    return out


@app.get("/v1/user/me/requests")
def user_my_requests(request: Request):  # type: ignore
    """Return the signed-in user's own lawyer-request submissions.
    v2.99.155 — Each item now includes `lawyer_public_slug` if a lawyer
    is assigned, so the UI can link to the lawyer's profile page."""
    try:
        from ..auth import session_from_header
        sess = session_from_header(request.headers.get("authorization"))
        if not sess:
            return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"})
        user_id = sess["user_id"]
        if not _LAWYER_REQUEST_PATH.exists():
            return {"ok": True, "n": 0, "items": []}
        items = []
        with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if not line: continue
                try: rec = json.loads(line)
                except: continue
                if rec.get("user_id") == user_id:
                    rec.pop("admin_note", None)
                    items.append(rec)
        items.sort(key=lambda r: r.get("ts", 0), reverse=True)

        # v2.99.155 — Enrich each item with the assigned lawyer's public slug.
        lawyer_ids = {it.get("assigned_lawyer_id") for it in items if it.get("assigned_lawyer_id")}
        slug_by_id: Dict[str, str] = {}
        if lawyer_ids and _LAWYER_APP_PATH.exists():
            try:
                with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f:
                    for line in f:
                        line = line.strip()
                        if not line: continue
                        try: lrec = json.loads(line)
                        except: continue
                        if lrec.get("id") in lawyer_ids and lrec.get("status") == "verified":
                            slug_by_id[lrec["id"]] = _lawyer_public_slug(lrec)
            except Exception: pass
        for it in items:
            lid = it.get("assigned_lawyer_id")
            if lid and lid in slug_by_id:
                it["lawyer_public_slug"] = slug_by_id[lid]

        return {"ok": True, "n": len(items), "items": items}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


@app.get("/v1/auth/users")
def auth_users_list(token: str = ""):  # type: ignore
    """Admin: list all users."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    try:
        from ..auth import list_users
        users = list_users()
        return {"ok": True, "n": len(users), "items": users}
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})


class _TriageRequest(BaseModel):  # type: ignore
    text: str


# v2.99.128 — Triage logging for future tuning
_TRIAGE_LOG_PATH = _wa_pl.Path("tau_rag/runtime/triage_log.jsonl")
_TRIAGE_LOG_LOCK = _wa_th.Lock()


def _log_triage_async(text: str, result: dict) -> None:
    """Append a triage call to JSONL for later review. Fire-and-forget."""
    import threading, time, uuid
    def _worker():
        try:
            _TRIAGE_LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
            entry = {
                "id":   str(uuid.uuid4())[:12],
                "ts":   time.time(),
                "iso":  time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()),
                "text": (text or "")[:500],   # truncate to keep file lean
                "ok":   bool(result.get("ok")),
            }
            if result.get("ok"):
                entry.update({
                    "triage_category":      result.get("triage_category"),
                    "domain":               result.get("domain"),
                    "domain_supported":     result.get("domain_supported"),
                    "risk_level":           result.get("risk_level"),
                    "urgent":               result.get("urgent"),
                    "ai_answer_allowed":    result.get("ai_answer_allowed"),
                    "lawyer_recommended":   result.get("lawyer_recommended"),
                    "signals":              result.get("signals"),
                })
            else:
                entry["reason"] = result.get("reason")
            with _TRIAGE_LOG_LOCK:
                with open(_TRIAGE_LOG_PATH, "a", encoding="utf-8") as f:
                    f.write(json.dumps(entry, ensure_ascii=False) + "\n")
        except Exception as e:
            print(f"[triage-log] failed: {type(e).__name__}: {e}")
    threading.Thread(target=_worker, daemon=True).start()


@app.post("/v1/triage/classify")
def triage_classify(req: _TriageRequest):  # type: ignore
    """v2.99.119 — Rule-based Hebrew legal triage. No LLM.
    v2.99.128 — Logs anonymized text + result for future tuning.

    Takes a Hebrew user question, returns:
      triage_category, domain, risk_level, ai_answer_allowed,
      lawyer_recommended, urgent, missing_facts, next_actions.
    """
    try:
        from ..triage import classify_question
        result = classify_question(req.text or "")
        _log_triage_async(req.text or "", result)
        return result
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}",
        })


@app.get("/v1/triage/log")
def triage_log(token: str = "", limit: int = 200):  # type: ignore
    """Admin: review triage classifications for accuracy."""
    import os
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    if not _TRIAGE_LOG_PATH.exists():
        return {"ok": True, "n": 0, "items": []}
    items = []
    try:
        with open(_TRIAGE_LOG_PATH, "r", encoding="utf-8") as f:
            for line in f:
                line = line.strip()
                if line:
                    try: items.append(json.loads(line))
                    except: continue
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    # Return newest first, capped
    items.reverse()
    capped = items[:max(1, min(limit, 2000))]
    return {"ok": True, "n_total": len(items), "n_returned": len(capped), "items": capped}


class _TriageReviewBody(BaseModel):  # type: ignore
    id:                str
    verdict:           str                                  # correct | wrong | flag
    expected_category: Optional[str] = ""
    note:              Optional[str] = ""


@app.post("/v1/triage/log/review")
def triage_log_review(req: _TriageReviewBody, token: str = ""):  # type: ignore
    """Admin: mark a triage log entry as correct/wrong with optional expected category + note."""
    import os, time
    admin_token = os.environ.get("LE_ADMIN_TOKEN", "")
    if not admin_token or token != admin_token:
        return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"})
    allowed = {"correct", "wrong", "flag"}
    if req.verdict not in allowed:
        return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_verdict"})
    if not _TRIAGE_LOG_PATH.exists():
        return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"})
    with _TRIAGE_LOG_LOCK:
        items = []
        try:
            with open(_TRIAGE_LOG_PATH, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line:
                        try: items.append(json.loads(line))
                        except: continue
        except Exception as e:
            return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
        found = False
        for rec in items:
            if rec.get("id") == req.id:
                rec["review_verdict"]           = req.verdict
                rec["review_expected_category"] = req.expected_category or ""
                rec["review_note"]              = req.note or ""
                rec["review_ts"]                = time.time()
                found = True; break
        if not found:
            return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"})
        try:
            tmp = _TRIAGE_LOG_PATH.with_suffix(".jsonl.tmp")
            with open(tmp, "w", encoding="utf-8") as f:
                for rec in items:
                    f.write(json.dumps(rec, ensure_ascii=False) + "\n")
            tmp.replace(_TRIAGE_LOG_PATH)
        except Exception as e:
            return JSONResponse(status_code=500, content={"ok": False, "error": str(e)})
    return {"ok": True, "id": req.id, "verdict": req.verdict}


@app.get("/v1/law/citers")
def law_citers(name: str, top: int = 30):  # type: ignore
    """v2.99.109 — Judgments in the corpus that cite this law.

    Resolves the input name to canonical via aliases, then collects all
    doc_ids that mention the canonical form OR any of its aliases.
    Returns each citer with court, date, and a snippet around the
    first match. Sorted by recency (newest first).
    """
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build
        pipe = get_pipeline()
        cn = get_or_build(pipe)

        nm = (name or "").strip()
        if not nm:
            return JSONResponse(status_code=400, content={"ok": False, "reason": "empty_name"})

        # Build the set of strings to look up in cited_by:
        # canonical, all aliases pointing TO canonical, plus the input.
        lookup_strs = {nm}
        alias_map = {}
        try:
            with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f:
                alias_map = json.load(f)
        except Exception:
            pass
        # If input is an alias, jump to canonical and pick up other aliases for it
        canonical = alias_map.get(nm, nm)
        lookup_strs.add(canonical)
        for short, canon in alias_map.items():
            if canon == canonical:
                lookup_strs.add(short)
        # Also add year-stripped short of canonical
        _year_re = _re.compile(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־–—]\d{4}\s*$")
        short_canon = _year_re.sub("", canonical).strip()
        if short_canon != canonical: lookup_strs.add(short_canon)

        # Collect citer doc_ids
        seen_ids = set()
        for s in lookup_strs:
            for did in (cn.cited_by.get(s, []) or []):
                seen_ids.add(did)
        # Also scan cited_by keys for fuzzy matches starting with the canonical-short
        # (catches "X, תש..." variants when listed under a different normalization)
        prefix_form = short_canon + ","
        for k in cn.cited_by:
            if k.startswith(prefix_form) or k == short_canon:
                for did in cn.cited_by[k]:
                    seen_ids.add(did)

        # Build metadata for each (Document uses .id; some pipeline variants
        # have .doc_id — handle both)
        docs = (getattr(pipe, "_indexed_docs", None) or
                getattr(pipe, "_docs", None) or [])
        doc_by_id = {}
        for d in docs:
            did = getattr(d, "doc_id", None) or getattr(d, "id", None)
            if did: doc_by_id[did] = d
        out = []
        # v2.99.111 — filter to judgments only (skip law/statute/wiki cross-refs
        # that get indexed under similar IDs)
        NON_JUDGMENT_PREFIXES = ("heb_law/", "heb_statute/", "heb_wikilaw/", "heb_kolzchut/")
        for did in list(seen_ids):
            if any(did.startswith(p) for p in NON_JUDGMENT_PREFIXES): continue
            d = doc_by_id.get(did)
            if not d: continue
            md   = getattr(d, "metadata", None) or {}
            # Skip if metadata flags this as a statute/law doc
            if md.get("kind") in ("statute", "law", "wikipedia", "kolzchut"): continue
            # v2.99.113 — d.text may be empty (lazy); use pipe.get_text fallback
            text = getattr(d, "text", "") or ""
            if not text and hasattr(pipe, "get_text"):
                try: text = pipe.get_text(did) or ""
                except Exception: text = ""
            cite = md.get("citation") or did
            dt   = (md.get("verdict_dt") or "")[:10]
            # v2.99.112 — Find first occurrence; try SHORTEST forms first
            # (longer canonical with year suffix rarely appears verbatim)
            snippet = ""
            matched_str = ""
            for s in sorted(lookup_strs, key=len):
                idx = text.find(s)
                if idx >= 0:
                    matched_str = s
                    start = max(0, idx - 80)
                    end   = min(len(text), idx + len(s) + 120)
                    snippet = ("…" if start > 0 else "") + text[start:end] + ("…" if end < len(text) else "")
                    break
            # v2.99.116 — law-specific sentiment (apply/interpret/limit/expand)
            rel = _classify_law_citation_context(text, matched_str) if matched_str else "apply"
            out.append({
                "doc_id":     did,
                "citation":   cite,
                "court":      md.get("court", ""),
                "verdict_dt": dt,
                "snippet":    snippet,
                "relation":   rel,
            })

        # Sort: newest first, then by court rank (Supreme first)
        def _year(r):
            s = (r.get("verdict_dt") or "")[:4]
            return -int(s) if s.isdigit() else 0
        out.sort(key=_year)
        n_total = len(out)
        out = out[:max(1, top)]

        return {
            "ok":           True,
            "name":         nm,
            "canonical":    canonical,
            "n_total":      n_total,
            "n_returned":   len(out),
            "lookup_terms": sorted(lookup_strs),
            "items":        out,
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"})


@app.get("/v1/laws/known-names")
def laws_known_names():  # type: ignore
    """v2.99.108 — Return short names for all cached laws so the client
    can linkify "סעיף N לחוק X" references in judgment text.

    Each item has:
      - canonical: the title stored on disk
      - aliases:   list of short names that map to it (from aliases.json
                   + an auto-derived year-stripped short form)
    """
    if not _LAW_CACHE_DIR.exists():
        return {"ok": True, "n": 0, "items": []}
    # Load aliases
    alias_map = {}  # short → canonical
    try:
        with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f:
            alias_map = json.load(f)
    except Exception:
        pass
    # Reverse: canonical → list of short forms
    by_canonical: Dict[str, List[str]] = {}
    for short, canonical in alias_map.items():
        by_canonical.setdefault(canonical, []).append(short)
    # Build response
    _year_re = _re.compile(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־–—]\d{4}\s*$")
    items = []
    for c in _law_cache_list():
        canonical = c["name"]
        aliases = list(by_canonical.get(canonical, []))
        # Auto-add year-stripped short form
        short = _year_re.sub("", canonical).strip()
        if short != canonical and short not in aliases:
            aliases.append(short)
        items.append({"canonical": canonical, "aliases": aliases})
    return {"ok": True, "n": len(items), "items": items}


@app.get("/v1/laws/search")
def laws_search(q: str, top: int = 30):  # type: ignore
    """v2.99.107 — Full-text search across all cached laws.

    Iterates over cached law JSONs (currently ~18, ~13K paragraphs), finds
    paragraphs containing the query, returns hits with law name, section
    title, paragraph num/text, and a context snippet.

    No external service — pure CPU-bound grep over the bundled cache.
    Typical latency: <100ms for 13K paragraphs."""
    q_norm = (q or "").strip()
    if not q_norm or len(q_norm) < 2:
        return {"ok": False, "reason": "query_too_short"}
    if not _LAW_CACHE_DIR.exists():
        return {"ok": True, "n": 0, "items": [], "n_searched": 0}
    hits = []
    n_paras_searched = 0
    n_laws_searched  = 0
    q_lower = q_norm.lower()
    for p in sorted(_LAW_CACHE_DIR.glob("*.json")):
        if p.name == "aliases.json": continue
        try:
            with open(p, "r", encoding="utf-8") as f:
                d = json.load(f)
        except Exception:
            continue
        n_laws_searched += 1
        law_name = d.get("name") or "?"
        for s in (d.get("sections") or []):
            sec_title = s.get("title") or ""
            for para in (s.get("paragraphs") or []):
                n_paras_searched += 1
                text = para.get("text") or ""
                if q_norm in text or q_lower in text.lower():
                    # Build snippet: ±60 chars around first match
                    idx = text.find(q_norm)
                    if idx < 0: idx = text.lower().find(q_lower)
                    start = max(0, idx - 60)
                    end   = min(len(text), idx + len(q_norm) + 60)
                    snippet = ("…" if start > 0 else "") + text[start:end] + ("…" if end < len(text) else "")
                    hits.append({
                        "law_name":     law_name,
                        "section":      sec_title,
                        "para_num":     para.get("num"),
                        "snippet":      snippet,
                        "text":         text[:400],  # truncate for payload size
                    })
                    if len(hits) >= top: break
            if len(hits) >= top: break
        if len(hits) >= top: break
    return {
        "ok":           True,
        "n":            len(hits),
        "n_searched":   n_paras_searched,
        "n_laws":       n_laws_searched,
        "query":        q_norm,
        "items":        hits,
    }


@app.get("/v1/law/by-name")
def law_by_name(name: str):  # type: ignore
    """Day 55 — fetch an Israeli law's full text from Wikisource and
    return it as structured sections.

    Wikisource has the canonical Hebrew text of every major Israeli
    law (חוק החוזים, חוק יסוד, פקודות, תקנות) under predictable
    titles. We search → fetch extract → split into chapters/sections.

    Args:
      name: Free-text law name, e.g. "חוק החוזים", "חוק יסוד כבוד האדם",
            "פקודת הנזיקין", "תקנות סדר הדין האזרחי".
    """
    import re as _r
    import urllib.parse as _up
    try:
        import urllib.request as _ur
        q = (name or "").strip()
        if not q:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "empty_name",
            })

        # v2.99.101 — Cache hit: serve from disk if previously fetched.
        cached = _law_cache_get(q)
        if cached:
            cached["from_cache"] = True
            return cached

        # 1. Search Wikisource for matching titles. Israeli laws are
        # usually titled exactly as the law name (e.g. "חוק החוזים
        # (חלק כללי)"). Prefer title prefix matches.
        search_url = (
            "https://he.wikisource.org/w/api.php?action=query&format=json"
            "&list=search&srnamespace=0&srlimit=5&srsearch=" + _up.quote(q)
        )
        req = _ur.Request(search_url, headers={
            "User-Agent": "legal-eye-bot/1.0 (https://legal-eye.1bigfam.com)"
        })
        with _ur.urlopen(req, timeout=8) as r:
            search_data = json.loads(r.read().decode("utf-8", errors="replace"))
        hits = (search_data.get("query") or {}).get("search") or []
        if not hits:
            return JSONResponse(status_code=404, content={
                "ok": False, "reason": "not_found_in_wikisource",
                "query": q,
            })
        # Score hits: exact-prefix match wins
        def _score(h):
            t = h.get("title", "")
            if t.startswith(q): return 100
            if q in t: return 50
            return 0
        hits.sort(key=_score, reverse=True)
        title = hits[0]["title"]

        # 2. Fetch wikitext (NOT extract — Israeli laws use custom
        # `{{ח:סעיף}}` templates that the extract API discards).
        wt_url = (
            "https://he.wikisource.org/w/api.php?action=parse&format=json"
            "&prop=wikitext&page=" + _up.quote(title)
        )
        req = _ur.Request(wt_url, headers={
            "User-Agent": "legal-eye-bot/1.0 (https://legal-eye.1bigfam.com)"
        })
        with _ur.urlopen(req, timeout=10) as r:
            wt_data = json.loads(r.read().decode("utf-8", errors="replace"))
        wt_obj = (wt_data.get("parse") or {}).get("wikitext") or {}
        wikitext = wt_obj.get("*") if isinstance(wt_obj, dict) else str(wt_obj)
        if not wikitext or len(wikitext) < 200:
            return JSONResponse(status_code=404, content={
                "ok": False, "reason": "wikitext_empty",
                "title": title,
            })

        # 3. Parse Hebrew-law templates → markdown-ish intermediate form.
        # See https://he.wikisource.org/wiki/תבנית:ח:סעיף  for full template set.
        extract_lines = []
        # First pass: drop unused templates / structural noise
        wt = wikitext
        # Drop ENTIRE templates we don't care about (note-boxes, formatting)
        for drop_tpl in ("ח:תיבה", "ח:מפריד", "ח:סוגר", "ח:סופר", "ח:התחלה",
                         "ח:פתיח-התחלה", "ח:מאגר", "ח:סוף", "ח:פתיח-סוף"):
            wt = _r.sub(r"\{\{" + drop_tpl + r"[^{}]*?\}\}", "", wt)
        # Drop internal HTML (table-of-contents divs)
        wt = _r.sub(r"<div[^>]*>", "", wt)
        wt = _r.sub(r"</div>", "", wt)
        wt = _r.sub(r"<br\s*/?>", "\n", wt)
        # Internal wiki-links: keep just the display text
        wt = _r.sub(r"\{\{ח:פנימי\|[^|}]*\|([^}]*)\}\}", r"\1", wt)
        wt = _r.sub(r"\{\{ח:פנימי\|([^}|]*)\}\}", r"\1", wt)
        # Plain [[link|text]] → text;  [[link]] → link
        wt = _r.sub(r"\[\[[^|\]]*\|([^\]]*)\]\]", r"\1", wt)
        wt = _r.sub(r"\[\[([^\]]*)\]\]", r"\1", wt)
        # Italic / bold markers
        wt = _r.sub(r"'''([^']+)'''", r"\1", wt)
        wt = _r.sub(r"''([^']+)''", r"\1", wt)

        # Parse line by line
        sections = []
        cur_title  = None
        cur_kind   = "preamble"
        cur_paras  = []
        pending_num = None    # set by {{ח:סעיף|N|TITLE}}, consumed by next {{ח:ת}}
        pending_subnum = None # set by {{ח:תת|(M)}}, consumed by following text
        title_line = None

        def _flush_section():
            nonlocal cur_paras, cur_title, cur_kind
            if cur_title is None and not cur_paras: return
            sections.append({
                "title":      cur_title,
                "kind":       cur_kind,
                "paragraphs": [p for p in cur_paras if p.get("text")],
            })
            cur_paras = []

        for raw_line in wt.split("\n"):
            line = raw_line.strip()
            if not line:
                continue
            # {{ח:כותרת|TITLE}} — overall law title (use as canonical name)
            m = _r.match(r"\{\{ח:כותרת\|([^}]+)\}\}", line)
            if m:
                title_line = m.group(1).strip()
                continue
            # {{ח:קטע2|ANCHOR|TITLE}} — chapter header
            m = _r.match(r"\{\{ח:קטע2\|[^|]*\|([^}]+)\}\}", line)
            if m:
                _flush_section()
                cur_title = m.group(1).strip()
                cur_kind  = "chapter"
                pending_num = None
                pending_subnum = None
                continue
            # {{ח:קטע3|ANCHOR|TITLE}} — sub-chapter header (סימן)
            m = _r.match(r"\{\{ח:קטע3\|[^|]*\|([^}]+)\}\}", line)
            if m:
                _flush_section()
                cur_title = m.group(1).strip()
                cur_kind  = "subchapter"
                pending_num = None
                pending_subnum = None
                continue
            # {{ח:סעיף|N|TITLE}} — section header (numbered)
            m = _r.match(r"\{\{ח:סעיף\|([^|]+)\|([^}]+)\}\}", line)
            if m:
                pending_num = m.group(1).strip()
                # Add a "section title" line BEFORE first sub-paragraph
                cur_paras.append({
                    "num":  pending_num + ".",
                    "text": m.group(2).strip(),
                    "is_section_title": True,
                })
                pending_subnum = None
                continue
            # {{ח:תת|(MARK)}} — sub-paragraph marker on its own
            m = _r.match(r"\{\{ח:תת\|([^}]+)\}\}\s*(.*)", line)
            if m:
                pending_subnum = m.group(1).strip()
                rest = m.group(2).strip()
                if rest:
                    cur_paras.append({
                        "num":  pending_subnum,
                        "text": _strip_remaining_templates(rest),
                    })
                    pending_subnum = None
                continue
            # {{ח:ת}} TEXT — plain paragraph (in current section)
            m = _r.match(r"\{\{ח:ת\}\}\s*(.*)", line)
            if m:
                rest = _strip_remaining_templates(m.group(1).strip())
                num = pending_subnum or pending_num
                if num:
                    cur_paras.append({"num": num, "text": rest})
                    pending_subnum = None  # consume
                    pending_num    = None  # consume (only first ת after סעיף)
                else:
                    cur_paras.append({"text": rest})
                continue
            # Bare text (no template prefix) — append to last paragraph if any
            if line and not line.startswith("{{") and not line.startswith("}}"):
                cleaned = _strip_remaining_templates(line)
                if cleaned and cur_paras:
                    cur_paras[-1]["text"] += " " + cleaned
                elif cleaned:
                    cur_paras.append({"text": cleaned})

        _flush_section()

        n_total_paras = sum(len(s.get("paragraphs") or []) for s in sections)
        if n_total_paras == 0:
            return JSONResponse(status_code=500, content={
                "ok": False, "reason": "parse_failed_no_paragraphs",
                "title": title, "wikitext_len": len(wikitext),
            })

        canonical_name = title_line or title
        response = {
            "ok":            True,
            "kind":          "law",
            "name":          canonical_name,
            "source":        "wikisource",
            "source_url":    "https://he.wikisource.org/wiki/" + _up.quote(title.replace(" ", "_")),
            "n_sections":    len(sections),
            "n_paragraphs":  n_total_paras,
            "n_chars":       len(wikitext),
            "sections":      sections,
        }
        # v2.99.101 — persist to disk cache (under canonical title + alias)
        try:
            _law_cache_set(q, canonical_name, response)
        except Exception as e:
            print(f"[law-cache] save fail: {e}")
        return response
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


# v2.99.90 — Smart Insights: "Related cases" with sentiment detection
# Given a cite (e.g., ע"א 4628/93), find all cases in our corpus that
# mention it. For each, look at the context around the mention to detect
# whether the citing case is AFFIRMING, DISTINGUISHING, or OVERTURNING
# the original ruling.
_REL_OVERTURN_RX = _re.compile(
    r"(?:ביטל(?:נו|תי|ה)?|הפך(?:נו|תי|ה)?|לבטל\b|לסטות\s+מ|"
    r"אינ[הו]\s+(?:עוד\s+)?הלכה|ההלכה.{0,40}שונתה|התרחק(?:נו|תי|ה)?\s+מ|"
    r"דחי[נתה]\s+(?:את\s+)?(?:ההלכה|הקביעה)|מבוטל(?:ת)?\s+ב?[זה]?[אה]?ת\s+פסק)"
)
_REL_DISTINGUISH_RX = _re.compile(
    r"(?:אבחנ(?:נו|תי|תה)|להבחין|שונה\s+מ|נסיבות\s+שונות|"
    r"לא\s+חל\b|אינ[הו]\s+חל|נסיבות\s+הענין\s+שונות|המקרה\s+שונה|"
    r"להבחין\s+בין)"
)
_REL_AFFIRM_RX = _re.compile(
    r"(?:אימצ(?:נו|תי|ה)|יישמ(?:נו|תי|ה)|חוזרת?|לפי\s+ההלכה|"
    r"כאמור\s+ב|בעקבות\s+הלכת|כפי\s+שנקבע|כפי\s+שהובהר|המבחן\s+שנקבע|"
    r"בהתאם\s+ל(?:הלכה|פסק)|מאשר(?:ת)?)"
)


def _classify_citation_context(text: str, cite: str) -> str:
    """Look at ~250 chars of context around the citation, classify."""
    if not text or not cite: return "neutral"
    i = text.find(cite)
    if i < 0: return "neutral"
    context = text[max(0, i - 150): i + len(cite) + 150]
    # Order matters: overturn → distinguish → affirm → neutral
    if _REL_OVERTURN_RX.search(context):   return "overturn"
    if _REL_DISTINGUISH_RX.search(context): return "distinguish"
    if _REL_AFFIRM_RX.search(context):      return "affirm"
    return "neutral"


# v2.99.116 — Law-specific classifier. Hebrew legal text around law
# citations uses different vocabulary than around case-to-case citations.
# Most law refs are "apply" by default; we look for specific signals of
# interpretation / limitation / expansion.
_LAW_REL_INTERPRET_RX = _re.compile(
    r"(?:מפרש(?:ים|ת|ים\s+את)?|פרשנות(?:\s+ה?סעיף|\s+ה?חוק)?|תכלית(?:\s+ה?סעיף|\s+ה?חוק|\s+החקיקה)?|"
    r"המשמעות\s+של|כיצד\s+יש\s+ל[הת]ב[יו]ן|לפרש\s+את\s+ה?סעיף)"
)
_LAW_REL_LIMIT_RX = _re.compile(
    r"(?:אינו\s+חל\s+על|לא\s+חל\s+(?:כאן|בענייננו|במקרה)|מצומצם\s+ל|"
    r"אינ[הו]\s+פוגע[תים]?\s+ב|לא\s+חל[הת]?\s+על|הוראת\s+ה?סעיף\s+אינה\s+חלה|"
    r"חרי?ג\s+ל(?:סעיף|חוק)|מוצא\s+מתחולת)"
)
_LAW_REL_EXPAND_RX = _re.compile(
    r"(?:חל\s+גם\s+על|כולל\s+(?:גם|כל)|מורחב[תים]?\s+ל|הרחבת\s+ה?(?:תחולה|הסעיף)|"
    r"באופן\s+רחב|פרשנות\s+מרחיבה|חל\s+על\s+כל)"
)

def _classify_law_citation_context(text: str, match_str: str) -> str:
    """For law citations: apply (default) / interpret / limit / expand."""
    if not text or not match_str: return "apply"
    i = text.find(match_str)
    if i < 0: return "apply"
    context = text[max(0, i - 150): i + len(match_str) + 200]
    if _LAW_REL_LIMIT_RX.search(context):     return "limit"
    if _LAW_REL_EXPAND_RX.search(context):    return "expand"
    if _LAW_REL_INTERPRET_RX.search(context): return "interpret"
    return "apply"


@app.get("/v1/judgment/related")
def judgment_related(cite: str, top: int = 8):  # type: ignore
    """Sprint 1 — related cases for the open judgment.

    Returns:
      cited_by: cases that cite this one (sorted by recency where possible)
                Each tagged with relation: 'overturn'|'distinguish'|'affirm'|'neutral'
      cites:    cases this one cites (basic list)
      n_in:     total cases that cite this one
      n_out:    total cites this case makes
    """
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build
        pipe = get_pipeline()
        cn = get_or_build(pipe)
        raw = (cite or "").strip()
        if not raw:
            return JSONResponse(status_code=400, content={
                "ok": False, "reason": "empty_cite",
            })
        # Strip surrounding text → canonical cite prefix
        m = _JR_CASE_RX.search(raw)
        prefix = m.group(1).strip() if m else raw
        # Find docs that mention this cite
        citers_ids = cn.cited_by.get(prefix, []) or []
        # Reverse: docs this cite-id cites
        cite_doc = cn.doc_for_citation.get(prefix)
        cited_by_us = (cn.cites.get(cite_doc) or []) if cite_doc else []
        # Resolve metadata for each citer
        # v2.99.520 fix: Document.id (not .doc_id — that's Chunk's attribute).
        # Earlier code crashed with AttributeError on every call.
        docs = (getattr(pipe, "_indexed_docs", None) or
                getattr(pipe, "_docs", None) or [])
        doc_by_id = {getattr(d, "id", None) or getattr(d, "doc_id", None): d for d in docs}
        out_citers = []
        for did in citers_ids[:max(top * 2, 16)]:  # over-fetch
            d = doc_by_id.get(did)
            if not d: continue
            md = getattr(d, "metadata", None) or {}
            text = getattr(d, "text", "") or ""
            citation = md.get("citation") or did
            verdict_dt = md.get("verdict_dt", "") or ""
            rel = _classify_citation_context(text, prefix)
            out_citers.append({
                "doc_id":     did,
                "citation":   citation,
                "court":      md.get("court", ""),
                "verdict_dt": verdict_dt,
                "relation":   rel,
                "snippet":    text[max(0, text.find(prefix) - 80): text.find(prefix) + 120] if prefix in text else "",
            })
        # Sort: overturn first (most surprising), then by date desc
        REL_ORDER = {"overturn": 0, "distinguish": 1, "affirm": 2, "neutral": 3}
        out_citers.sort(key=lambda r: (REL_ORDER.get(r["relation"], 9),
                                        -1 * (int(r["verdict_dt"][:4]) if r["verdict_dt"][:4].isdigit() else 0)))
        out_citers = out_citers[:top]

        return {
            "ok":         True,
            "cite":       prefix,
            "n_citers":   len(citers_ids),
            "n_cited_by_us": len(cited_by_us),
            "citers":     out_citers,
            "cites_out":  [{"text": c.get("text"), "kind": c.get("kind")}
                            for c in cited_by_us[:20]],
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


class _VerifyCitationsRequest(BaseModel):  # type: ignore
    cites: List[str]


@app.post("/v1/verify_citations")
def verify_citations(body: _VerifyCitationsRequest):  # type: ignore
    """Day 54 — bulk citation existence check, powers the ChatGPT
    hallucination filter. For each cite, returns:
      - status='verified_corpus' + doc_id  (found in our corpus)
      - status='not_found_locally'         (extension should try web)

    Designed to be fast: no parsing, no external fetches. Just a corpus
    index lookup per cite. Wikisource/Wikipedia/court.gov.il are checked
    by the extension itself (which has the helper privileges).
    """
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build
        pipe = get_pipeline()
        cn = get_or_build(pipe)

        results = []
        for raw_cite in (body.cites or [])[:100]:  # cap at 100 per request
            raw = (raw_cite or "").strip()
            if not raw:
                results.append({"cite": raw_cite, "status": "empty"})
                continue
            m = _JR_CASE_RX.search(raw)
            prefix = m.group(1).strip() if m else raw
            candidates = [
                prefix,
                prefix.replace('"', '"').replace("'", "׳"),
                prefix.replace('"', '"').replace("'", "'"),
                _re.sub(r"\s+", "", prefix),
            ]
            doc_id = None
            for c in candidates:
                doc_id = cn.doc_for_citation.get(c)
                if doc_id:
                    break
            if doc_id:
                results.append({
                    "cite":     raw_cite,
                    "prefix":   prefix,
                    "status":   "verified_corpus",
                    "doc_id":   doc_id,
                })
            else:
                results.append({
                    "cite":     raw_cite,
                    "prefix":   prefix,
                    "status":   "not_found_locally",
                })
        return {"results": results}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


@app.get("/v1/judgment/{doc_id:path}/render")
def judgment_reader_render(doc_id: str):  # type: ignore
    """Day 51 — full structured render of a single judgment for the
    reader modal. Cleans the corpus prefix, extracts citations, splits
    into paragraphs.
    """
    try:
        from ..pipeline import get_pipeline
        pipe = get_pipeline()
        raw = pipe.get_text(doc_id) or ""
        if not raw:
            return JSONResponse(status_code=404, content={
                "ok": False, "reason": "doc_not_found",
            })

        # Clean leading corpus brackets + "פסיקה — citation (court):" marker
        cleaned = _JR_BRACKET_HEADER_RE.sub("", raw).lstrip()
        cleaned = _JR_PROSE_MARKER_RE.sub("", cleaned, count=1).lstrip()

        # Try to extract the citation that appears most prominently in
        # the original prefix — it's still in `raw` even after cleaning.
        cite_match = _JR_CASE_RX.search(raw)
        citation = cite_match.group(1) if cite_match else None

        # Pull court name from the bracket header (e.g. "[פסק דין עליון]")
        court = None
        court_match = _re.search(
            r"\[(פסק\s*דין\s*(?:עליון|מחוזי|השלום|הארצי\s*לעבודה|אזורי\s*לעבודה)[^\]]*)\]",
            raw)
        if court_match:
            court = court_match.group(1)

        # Split body into paragraphs
        paragraphs = []
        for chunk in _JR_PARA_RX.split(cleaned):
            t = chunk.strip()
            if not t:
                continue
            # Pull a leading paragraph number/letter if present
            num_match = _re.match(r"^((?:\d+\.|[א-י][.)])\s)(.+)", t, _re.DOTALL)
            if num_match:
                paragraphs.append({
                    "num":  num_match.group(1).strip(),
                    "text": num_match.group(2).strip(),
                })
            else:
                paragraphs.append({"num": None, "text": t})

        # Extract unique citations across the cleaned body
        citations = []
        seen = set()
        for m in _JR_CASE_RX.finditer(cleaned):
            c = m.group(1).strip()
            if c not in seen:
                seen.add(c)
                citations.append(c)

        # Build "open in" links via Google.
        # - Nevo: site-restricted (their pages DO contain citation text)
        # - "Free" path: plain Google search (NO site restriction).
        #   court.gov.il uses opaque file IDs so site:court.gov.il
        #   returns zero results. Plain Google reliably surfaces
        #   pador.co.il + other free sources where the citation IS
        #   indexed.
        def _nevo_url(cite):
            from urllib.parse import quote
            q = f'site:nevo.co.il "{cite}"'
            return f"https://www.google.com/search?q={quote(q)}"

        def _courts_url(cite):
            from urllib.parse import quote
            q = f'"{cite}"'
            return f"https://www.google.com/search?q={quote(q)}"

        return {
            "ok":          True,
            "doc_id":      doc_id,
            "citation":    citation,
            "court":       court,
            "n_paragraphs": len(paragraphs),
            "n_citations": len(citations),
            "n_chars":     len(cleaned),
            "paragraphs":  paragraphs[:200],  # cap — anything more is suspicious
            "citations":   citations[:60],
            "links": {
                "nevo":    _nevo_url(citation) if citation else None,
                "courts":  _courts_url(citation) if citation else None,
            },
        }
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })


class _CitationGraphRequest(BaseModel):  # type: ignore
    doc_ids: List[str] = []
    max_nodes: int = 7


@app.post("/v1/citation_graph")
def citation_graph_subgraph(body: _CitationGraphRequest):  # type: ignore
    """Day 49 Phase 2 — return a subgraph showing which cases in the
    provided list cite which others.

    Input:  doc_ids — list of corpus doc_ids OR raw case-citation strings
            (e.g. "ע\"א 4011/97"). max_nodes caps the result.
    Output: { nodes: [{id, label, kind}], edges: [{from, to}] }

    Used by the topic-dossier's citation-graph mini-SVG to visualize
    how the cases in the result relate.
    """
    try:
        from ..pipeline import get_pipeline
        from ..citation_network import get_or_build
        pipe = get_pipeline()
        cn = get_or_build(pipe)

        # Normalize input: dedupe + cap
        ids = list(dict.fromkeys(body.doc_ids))[: body.max_nodes]
        if not ids:
            return {"ok": True, "nodes": [], "edges": []}

        # Resolve each input id → doc_id in the network.
        # Many inputs come as citation strings ("ע\"א 4011/97") not doc_ids;
        # use cn.doc_for_citation to map citation→doc_id.
        resolved = {}
        for i in ids:
            doc_id = cn.doc_for_citation.get(i) or (
                i if i in cn.cites else None)
            if doc_id:
                resolved[i] = doc_id

        # Build nodes (use the input string as label for readability)
        nodes = [{"id": i, "label": i, "kind": "case"} for i in ids]

        # Build edges: for each resolved doc, check its outgoing cites;
        # add an edge if the target is also in our input set.
        # Use both doc_id and citation-text matching since the inputs
        # might be a mix.
        inputs_set = set(ids)
        edges = []
        for src_input, src_doc in resolved.items():
            for c in cn.cites.get(src_doc, []):
                cite_text = c.get("text", "")
                # Target is in inputs by raw citation match, OR by
                # resolved doc-id match.
                tgt_input = None
                if cite_text in inputs_set:
                    tgt_input = cite_text
                else:
                    resolved_target = cn.doc_for_citation.get(cite_text)
                    if resolved_target:
                        for other_input, other_doc in resolved.items():
                            if other_doc == resolved_target and other_input != src_input:
                                tgt_input = other_input
                                break
                if tgt_input and tgt_input != src_input:
                    edges.append({"from": src_input, "to": tgt_input})

        return {"ok": True, "nodes": nodes, "edges": edges}
    except Exception as e:
        return JSONResponse(status_code=500, content={
            "ok": False, "error": f"{type(e).__name__}: {e}"
        })