"""FastAPI app. Run: uvicorn tau_rag.api.fastapi_app:app --reload """ from __future__ import annotations from contextlib import asynccontextmanager from typing import Any, Dict, List, Optional try: from fastapi import FastAPI, HTTPException from fastapi.exceptions import RequestValidationError from pydantic import BaseModel except Exception as e: # pragma: no cover raise RuntimeError( "FastAPI not installed. `pip install tau-rag[api]`." ) from e from ..core.config import Config from ..core.types import Document, Query, Strategy from ..pipeline import Pipeline from .models import ( BatchQueryRequest, ChatRequest, DocumentBody, DocumentsRequest, FeedbackRequest, QueryPresetBody, QueryRequest, SearchRequest, ) from .routers import ( admin_content_router, admin_controls_router, admin_ops_router, admin_runtime_router, chat_router, documents_router, public_router, system_router, ) from .errors import ( ErrorCode, Limits, build_error_body, validate_query_text, validate_doc_list, validate_k, ) # ──────────────────────────────────────────────────────────────────── # Production gate — refuse to boot when external LLM keys are set. # legal-eye is verbatim-from-precedent (see CLAUDE.md "No external LLM"); # a leaked key in env would silently change answer behavior in code paths # that probe the env. # # v2.89 — INVERTED: this assertion is now **default-on**. A developer # who legitimately needs LLM keys in their local env (for unrelated # tools) can opt out with TAU_RAG_ALLOW_EXTERNAL_LLM=true. Production # environments (HF Space, etc.) MUST NOT set that flag. # ──────────────────────────────────────────────────────────────────── import os as _os # noqa: E402 _LE_ALLOW_LLM = (_os.environ.get("TAU_RAG_ALLOW_EXTERNAL_LLM") or "").lower() in ("1", "true", "yes") if not _LE_ALLOW_LLM: if _os.environ.get("ANTHROPIC_API_KEY") or _os.environ.get("OPENAI_API_KEY"): raise RuntimeError( "legal-eye refuses to boot with ANTHROPIC_API_KEY or " "OPENAI_API_KEY in env — the product is verbatim-from-" "precedent and external LLM access is a brand/regulatory " "breaking change. Unset both keys, OR if you are running " "local dev with these keys present for unrelated tools, " "set TAU_RAG_ALLOW_EXTERNAL_LLM=true to bypass. Production " "environments must NEVER set the bypass flag." ) app = FastAPI(title="TAU-RAG", version="2.0.0") # ---------------------------------------------------- CORS + security from fastapi.middleware.cors import CORSMiddleware # noqa: E402 from starlette.responses import Response # for response_class=Response from .security import cors_config_from_env, apply_security_headers # noqa: E402 _cors_cfg = cors_config_from_env() if _cors_cfg["allow_origins"]: app.add_middleware(CORSMiddleware, **_cors_cfg) @app.middleware("http") async def _security_headers_middleware(request, call_next): response = await call_next(request) apply_security_headers(response.headers) return response # ---------------------------------------------------- global error handlers def _rid_from(request) -> Optional[str]: return getattr(getattr(request, "state", None), "request_id", None) def _maybe_translate_body(body, request) -> Dict[str, Any]: """v2.26 — translate error.message via Accept-Language header. Safe: returns body unchanged if no match or no header.""" try: if not isinstance(body, dict) or "error" not in body: return body err = body["error"] if not isinstance(err, dict): return body accept = (request.headers.get("accept-language") if request is not None else None) if not accept: return body from ..middleware.i18n import get_i18n code = err.get("code") message = err.get("message", "") translated = get_i18n().translate(code, message, accept) if translated != message: err["message"] = translated except Exception: pass return body @app.exception_handler(HTTPException) async def _http_exc_handler(request, exc: HTTPException): # Map FastAPI's status codes to our canonical codes code_map = { 400: ErrorCode.VALIDATION_ERROR, 401: ErrorCode.UNAUTHORIZED, 403: ErrorCode.ADMIN_REQUIRED, 404: ErrorCode.NOT_FOUND, 413: ErrorCode.PAYLOAD_TOO_LARGE, 422: ErrorCode.VALIDATION_ERROR, 429: ErrorCode.RATE_LIMITED, } code = code_map.get(exc.status_code, ErrorCode.INTERNAL_ERROR) detail = exc.detail message = detail if isinstance(detail, str) else "request failed" details = detail if isinstance(detail, dict) else None body = build_error_body(code, message, _rid_from(request), details) body = _maybe_translate_body(body, request) headers = dict(exc.headers or {}) rid = _rid_from(request) if rid: headers["X-Request-ID"] = rid return JSONResponse(status_code=exc.status_code, content=body, headers=headers) @app.exception_handler(RequestValidationError) async def _validation_exc_handler(request, exc: RequestValidationError): body = build_error_body( ErrorCode.VALIDATION_ERROR, "request failed validation", _rid_from(request), details={"errors": exc.errors()}, ) body = _maybe_translate_body(body, request) headers = {"X-Request-ID": _rid_from(request)} if _rid_from(request) else {} return JSONResponse(status_code=422, content=body, headers=headers) @app.exception_handler(Exception) async def _unhandled_exc_handler(request, exc: Exception): body = build_error_body( ErrorCode.INTERNAL_ERROR, f"{type(exc).__name__}: {exc}"[:300], _rid_from(request), ) headers = {"X-Request-ID": _rid_from(request)} if _rid_from(request) else {} return JSONResponse(status_code=500, content=body, headers=headers) # ----------------------------------------------------------------- middleware from fastapi import Request, UploadFile, File, Form from fastapi.responses import JSONResponse from collections import Counter from ..middleware import get_cache, get_limiter from ..middleware.auth import get_auth from ..middleware.ratelimit import RateLimitExceeded from ..middleware.observability import ( get_obs, generate_request_id, RequestLog, _hash_prefix, ) from ..middleware.maintenance import get_maintenance from ..middleware.pii_redaction import get_pii_redactor from ..middleware.slow_queries import get_slow_tracker, SlowRecord from ..middleware.quota import get_quota_tracker from ..middleware.idempotency import get_idempotency_store from ..middleware.request_timeout import get_timeout_guard from ..middleware.body_limit import get_body_limit_guard from ..middleware.deprecation import ( get_deprecation_registry, build_headers as _build_dep_headers, ) from ..middleware.cost import get_cost_tracker from ..middleware.compression import get_compression_tracker from ..middleware.ip_allowlist import get_ip_allowlist from ..middleware.query_complexity import get_query_complexity from ..middleware.key_labels import get_key_labels from ..middleware.i18n import get_i18n from ..middleware.slo import get_slo_tracker from ..middleware.prom_histograms import get_prom_histograms from ..middleware.cost_alerts import get_cost_alerts from ..middleware.async_jobs import get_job_queue from ..middleware.etag import ( compute_etag as _compute_etag, matches_if_none_match as _matches_inm, get_etag_tracker, ) from ..middleware.hmac_signing import get_hmac_signing from ..middleware.traceparent import ( get_traceparent_manager, parse_traceparent as _parse_tp, build_traceparent as _build_tp, ) from ..middleware.baggage import ( get_baggage_manager, parse_baggage as _parse_baggage, build_baggage as _build_baggage, ) from ..middleware.autocomplete import get_autocomplete import asyncio as _asyncio import time as _time # Also expose as plain `time` so module-level expressions and # new handlers can use `time.time()` without per-function imports. import time @app.middleware("http") async def auth_and_ratelimit_middleware(request: Request, call_next): t0 = _time.time() path = request.url.path protected = (path.startswith("/v1/generate") or path.startswith("/v1/chat") or path.startswith("/v1/documents")) admin_only = path.startswith("/v1/admin/") api_key = request.headers.get("x-api-key") # X-Request-ID — honor client-supplied or generate our own request_id = request.headers.get("x-request-id") or generate_request_id() # Stash on request.state so handlers can correlate if they want request.state.request_id = request_id # v1.99 — optional body capture for replay. Opt-in via env var # TAU_RAG_OBS_CAPTURE_BODY=1. Only captures bodies for the replay- # able endpoints (search/generate/chat) to keep log size bounded # and avoid picking up admin request bodies with keys. captured_body: Optional[str] = None _REPLAY_CAPTURE_PATHS = ( "/v1/search", "/v1/generate", "/v1/chat", ) # v2.24 — also trigger body capture if query-complexity scoring # is enabled, so the check has something to score. _needs_body = ( _os.environ.get("TAU_RAG_OBS_CAPTURE_BODY") == "1" or get_query_complexity().is_enabled() ) if (_needs_body and request.method == "POST" and any(path.startswith(p) for p in _REPLAY_CAPTURE_PATHS)): try: raw = await request.body() # Truncate to 4KB — real queries are under 1KB, legal # texts rarely reach 2KB. Very-long payloads get flagged # but replay won't work on them (acceptable). if raw is not None: captured_body = raw[:4096].decode("utf-8", errors="replace") # v2.8 — PII redaction. When TAU_RAG_PII_REDACT=1 (or # admin flipped via endpoint), scrub Israeli IDs, phone # numbers, emails, and CC-like digit runs from the # captured text BEFORE it hits the observability log, # JSONL file, stdout, or SSE tail. No-op when disabled. captured_body = get_pii_redactor().redact(captured_body) # Put the already-read body back on the request so the # downstream handler still sees it. async def _receive() -> Dict[str, Any]: return {"type": "http.request", "body": raw, "more_body": False} request._receive = _receive # type: ignore[attr-defined] except Exception: captured_body = None def _log(status: int, error: Optional[str] = None) -> None: extra: Dict[str, Any] = {} if error: extra["error"] = error if captured_body is not None: extra["body"] = captured_body # v2.25 — attach per-key labels (tenant, tier, region, ...) so # they auto-propagate to log rows / dashboards. No-op when # key has no labels configured. if api_key: try: labels = get_key_labels().get_labels(api_key) if labels: extra["labels"] = labels except Exception: pass latency_ms = (_time.time() - t0) * 1000.0 get_obs().record(RequestLog( ts=_time.time(), request_id=request_id, method=request.method, path=path, status=status, latency_ms=latency_ms, key_hash_prefix=_hash_prefix(api_key), client_ip=(request.client.host if request.client else None), user_agent=request.headers.get("user-agent"), event_type="request", extra=extra, )) # v2.9 — also feed the slow-query tracker. No-op if threshold=0 # or if the request was fast enough. Kept off the observability # log hot-path: cheap dict append in the tracker. try: get_slow_tracker().maybe_record(SlowRecord( ts=_time.time(), request_id=request_id, method=request.method, path=path, status=status, latency_ms=latency_ms, error=error, )) except Exception: pass # v2.30 — feed every request into SLO samplers (latency / # availability). Lock-held inside tracker; no external I/O. try: get_slo_tracker().record_request(status, latency_ms) except Exception: pass # v2.33 — observe in Prometheus histogram per path. try: get_prom_histograms().observe(path, latency_ms) except Exception: pass # v2.21 — cost tracking. Records calls and estimated tokens # per API key (hashed prefix). No-op if disabled or no rates # configured. Zero overhead when off. if 200 <= status < 300 and api_key: try: req_bytes = (len(captured_body or "") if captured_body else 0) get_cost_tracker().record( raw_key=api_key, path=path, req_bytes=req_bytes, # response size unknown at this middleware layer # without consuming the body; estimate req only. resp_bytes=0, ) # v2.32 — check cost alert thresholds. Fires a webhook # via v1.71 dispatcher on first crossing of each # configured threshold. from ..middleware.cost import ( _hash_prefix as _cost_hash_prefix, ) kp = _cost_hash_prefix(api_key) if kp: usage = get_cost_tracker().usage_for_key(kp) alerts = get_cost_alerts().check_and_alert( kp, float(usage.get("cost_usd", 0.0))) if alerts: from ..middleware import ( get_webhook_dispatcher, ) for a in alerts: try: get_webhook_dispatcher().dispatch(a) except Exception: pass get_obs().audit( "cost.alert_fired", key_prefix=a["key_prefix"], threshold_usd=a["threshold_usd"], current_cost_usd=a["current_cost_usd"], ) except Exception: pass # v2.16 — request body size limit. Cheap Content-Length pre-check # so large bodies don't traverse auth/quota just to get rejected. _body_guard = get_body_limit_guard() if _body_guard.is_enabled(): _body_guard.record_check() cl_header = request.headers.get("content-length") if cl_header: try: cl = int(cl_header) if cl > _body_guard.max_bytes: _body_guard.record_reject() _log(413, error="body_too_large") return JSONResponse( status_code=413, headers={"X-Request-ID": request_id}, content=build_error_body( ErrorCode.PAYLOAD_TOO_LARGE, f"body too large: {cl} > {_body_guard.max_bytes}", request_id=request_id, details={"content_length": cl, "max_bytes": _body_guard.max_bytes}, ), ) except ValueError: pass # 1. Auth check (only if TAU_RAG_REQUIRE_AUTH is set OR admin path) auth = get_auth() # v2.7 — maintenance / drain mode. Admin traffic always flows (so # operators can turn it off again); everyone else gets 503 + # Retry-After. Check happens AFTER auth object is available so we # can ask ``is_admin(key)`` but BEFORE rate limiting — otherwise a # drained pod would count rejected requests against the limiter, # polluting stats. # v2.11 — k8s probes (/livez, /readyz) must always reach the # handler so the probe reflects true readiness. Drain is ONE of # several reasons a pod might be unready; the probe itself (via # the readiness registry's ``not_draining`` check) signals it. # Blocking the probe at middleware level would mask other # unreadiness signals during drain. _PROBE_PATHS = ("/livez", "/readyz") maint = get_maintenance() if (maint.is_enabled() and not admin_only and not auth.is_admin(api_key) and path not in _PROBE_PATHS): snap = maint.snapshot() _log(503, error="maintenance") return JSONResponse( status_code=503, headers={ "Retry-After": str(int(snap["retry_after"])), "X-Request-ID": request_id, }, content=build_error_body( ErrorCode.INTERNAL_ERROR, "service is in maintenance mode", request_id=request_id, details={ "reason": snap["reason"], "retry_after": int(snap["retry_after"]), "maintenance_since_sec": round( snap["duration_sec"], 2), }, ), ) if admin_only: if not auth.is_admin(api_key): _log(401, error="admin_required") body = build_error_body( ErrorCode.ADMIN_REQUIRED, "admin scope required", request_id=request_id, details={"hint": "pass X-API-Key with admin scope"}, ) body = _maybe_translate_body(body, request) return JSONResponse( status_code=401, headers={"X-Request-ID": request_id}, content=body, ) elif protected and auth.required: scope = "write" if request.method in ("POST", "PUT", "DELETE") else "read" if not auth.validate(api_key, scope=scope): _log(401, error="unauthorized") body = build_error_body( ErrorCode.UNAUTHORIZED, "missing or invalid X-API-Key", request_id=request_id, details={"required_scope": scope}, ) body = _maybe_translate_body(body, request) return JSONResponse( status_code=401, headers={"X-Request-ID": request_id}, content=body, ) # v2.23 — per-API-key IP allowlist. Runs right after auth, before # quota/rate-limit: blocks stolen-key replay from unknown IPs # before they consume any per-key counters. No-op for keys with # no allowlist configured (opt-in per key). if api_key: client_ip_for_acl = (request.client.host if request.client else None) if not get_ip_allowlist().is_allowed(api_key, client_ip_for_acl): _log(403, error="ip_not_allowed") return JSONResponse( status_code=403, headers={"X-Request-ID": request_id}, content=build_error_body( ErrorCode.UNAUTHORIZED, "request IP not in allowlist for this key", request_id=request_id, details={"client_ip": client_ip_for_acl}, ), ) # v2.35 — HMAC signature verification. For keys registered in the # HMAC store, the client MUST present a valid X-Signature header # + X-Timestamp header. Unsigned keys pass through. Signed # requests need fresh timestamps (within skew window) and # canonical-string signatures matching the shared secret. if api_key: hmac_store = get_hmac_signing() from ..middleware.hmac_signing import _key_prefix as _hm_prefix _hm_pfx = _hm_prefix(api_key) if hmac_store.requires_signing(_hm_pfx): # Need body for the signature canonicalization sig = request.headers.get("x-signature") ts = request.headers.get("x-timestamp") raw_body = captured_body.encode("utf-8") if captured_body else b"" if not captured_body and request.method == "POST": # Ensure body captured for HMAC even if other capture # flags are off try: raw_body = await request.body() async def _receive_hmac() -> Dict[str, Any]: return {"type": "http.request", "body": raw_body, "more_body": False} request._receive = _receive_hmac # type: ignore[attr-defined] except Exception: raw_body = b"" ok, reason = hmac_store.verify( raw_key=api_key, method=request.method, path=path, body=raw_body, signature=sig, timestamp=ts, ) if not ok: _log(401, error=f"hmac_{reason}") return JSONResponse( status_code=401, headers={"X-Request-ID": request_id}, content=build_error_body( ErrorCode.UNAUTHORIZED, f"HMAC verification failed: {reason}", request_id=request_id, details={"reason": reason}, ), ) # 2. v2.12 — Per-API-key daily quota. Runs BEFORE rate limit so # the per-second limiter doesn't deduct tokens for requests that # will be rejected anyway. Skipped for: # - unauthenticated paths (no key to meter) # - whitelisted clients (same as rate limiter whitelist) # - admin-only paths (admin already auth'd; no quota) # Only applies when the key actually has a quota configured — # unquotaed keys are unlimited (same as pre-v2.12 behavior). if api_key and not admin_only: client_ip = request.client.host if request.client else None if (api_key not in get_limiter().whitelist and client_ip not in get_limiter().whitelist): qc = get_quota_tracker().check_and_increment(api_key) if not qc.ok: _log(429, error="quota_exceeded") return JSONResponse( status_code=429, headers={ "Retry-After": str(qc.reset_in_sec), "X-Request-ID": request_id, "X-Quota-Limit": str(qc.limit), "X-Quota-Used": str(qc.used), }, content=build_error_body( ErrorCode.RATE_LIMITED, f"daily quota exceeded: {qc.used}/{qc.limit}", request_id=request_id, details={ "quota": "daily", "limit": qc.limit, "used": qc.used, "reset_in_sec": qc.reset_in_sec, "key_prefix": qc.key_prefix, }, ), ) # 3. Rate limit (skip admin — already auth'd) if protected: try: client_key = ( api_key or (request.client.host if request.client else "unknown") ) # v1.73 — pass path so the limiter can apply per-endpoint overrides get_limiter().acquire(client_key, path=path) except RateLimitExceeded as e: _log(429, error="rate_limited") return JSONResponse( status_code=429, headers={"Retry-After": f"{e.retry_after:.1f}", "X-Request-ID": request_id}, content=build_error_body( ErrorCode.RATE_LIMITED, f"rate limit exceeded for {e.key!r}", request_id=request_id, details={"retry_after": round(e.retry_after, 3), "key": e.key}, ), ) # v2.24 — query complexity pre-check. For POSTs on the query # endpoints, score the query and reject with 400 if it exceeds # configured thresholds. Uses the captured body (reuses the same # read that idempotency does). Non-POST / non-query paths skip. _COMPLEXITY_PATHS = ("/v1/generate", "/v1/chat", "/v1/search") if (captured_body is not None and request.method == "POST" and any(path.startswith(p) for p in _COMPLEXITY_PATHS) and get_query_complexity().is_enabled()): try: import json as _json2 parsed = _json2.loads(captured_body) q = parsed.get("query", "") if isinstance(parsed, dict) else "" if q: scored = get_query_complexity().score(q) if scored.over_limit: _log(400, error="query_too_complex") return JSONResponse( status_code=400, headers={"X-Request-ID": request_id}, content=build_error_body( ErrorCode.VALIDATION_ERROR, scored.reason or "query too complex", request_id=request_id, details={ "n_tokens": scored.n_tokens, "n_chars": scored.n_chars, "unique_ratio": scored.unique_ratio, "score": scored.score, }, ), ) except Exception: pass # v2.13 — Idempotency-Key check BEFORE dispatch. Scoped to # (api_key_prefix, idempotency_key) so two clients using the same # key don't collide. POST only, whitelisted paths only. _IDEMPOTENT_PATHS = ("/v1/generate", "/v1/chat", "/v1/search") idem_key = request.headers.get("idempotency-key") idem_eligible = ( idem_key and request.method == "POST" and any(path.startswith(p) for p in _IDEMPOTENT_PATHS) ) if idem_eligible: idem_scope = _hash_prefix(api_key) or ( request.client.host if request.client else "anon") cached = get_idempotency_store().get(idem_scope, idem_key) if cached is not None: _log(cached.status, error="idempotent_hit") headers = { "X-Request-ID": request_id, "X-Idempotency-Hit": "1", "X-Idempotency-Key": idem_key, **cached.headers_extra, } return JSONResponse( status_code=cached.status, content=cached.body, headers=headers, ) # 3. Dispatch + log # v2.4 — set request_id on the tracer's thread-local so all pipeline # spans (v1.27) created during handler execution get auto-tagged. try: from ..observability.tracing import get_tracer _t = get_tracer() _t.set_request_id(request_id) except Exception: _t = None # v2.14 + v2.20 — wall-clock timeout enforcement. v2.20 adds # per-endpoint overrides: /v1/generate might get 30s while # /v1/search gets 5s. Longest-prefix match; 0 = no enforcement. _guard = get_timeout_guard() _guard.record_request() _to_prefix, _to_ms = _guard.resolve(path) try: if _to_ms > 0: try: response = await _asyncio.wait_for( call_next(request), timeout=_to_ms / 1000.0, ) except _asyncio.TimeoutError: _guard.record_timeout(path=path) _log(504, error="request_timeout") return JSONResponse( status_code=504, headers={"X-Request-ID": request_id}, content=build_error_body( ErrorCode.INTERNAL_ERROR, f"request exceeded {_to_ms:.0f}ms timeout", request_id=request_id, details={"timeout_ms": _to_ms, "path": path, "matched_prefix": _to_prefix or "(global)"}, ), ) else: response = await call_next(request) except Exception as e: _log(500, error=f"{type(e).__name__}: {e}") raise finally: if _t is not None: try: _t.set_request_id(None) except Exception: pass response.headers["X-Request-ID"] = request_id # v2.48 — W3C traceparent propagation. If enabled, parse inbound # traceparent header (record stats) and emit one on response so # downstream callers can correlate. When inbound missing, we # generate a new trace_id + span_id so clients still get a header. try: _tp_mgr = get_traceparent_manager() if _tp_mgr.is_enabled(): inbound = request.headers.get("traceparent") if inbound: parsed = _parse_tp(inbound) _tp_mgr.record_parsed(parsed is not None) if parsed: # Emit with same trace_id, new span_id response.headers["traceparent"] = _build_tp( trace_id=parsed["trace_id"], parent_id=None, sampled=True, ) else: response.headers["traceparent"] = _build_tp() else: response.headers["traceparent"] = _build_tp() _tp_mgr.record_emitted() except Exception: pass # v2.52 — W3C Baggage: parse inbound, stash on request state, # re-emit on response for downstream propagation. try: _bg_mgr = get_baggage_manager() if _bg_mgr.is_enabled(): inbound_bg = request.headers.get("baggage") if inbound_bg: items = _parse_baggage(inbound_bg) if items: _bg_mgr.record_parsed(items) # Re-emit response.headers["baggage"] = _build_baggage(items) _bg_mgr.record_emitted() except Exception: pass # v2.17 — attach deprecation / sunset headers if this path is # in the deprecation registry. Applied AFTER handler so we don't # override any headers the handler set. try: _dep_entry = get_deprecation_registry().match(path) if _dep_entry is not None: for k, v in _build_dep_headers(_dep_entry).items(): response.headers[k] = v except Exception: pass # v2.13 — cache successful responses for idempotency replay. # Consumes body_iterator and reconstructs a response so downstream # still sees the full body. if idem_eligible and 200 <= response.status_code < 300: try: body_bytes = b"" async for chunk in response.body_iterator: # type: ignore[attr-defined] body_bytes += chunk import json as _json try: body_json = _json.loads(body_bytes.decode("utf-8")) except Exception: body_json = None if body_json is not None: get_idempotency_store().set( idem_scope, idem_key, response.status_code, body_json, ) from fastapi.responses import Response as _Resp response = _Resp( content=body_bytes, status_code=response.status_code, headers=dict(response.headers), media_type=response.media_type, ) except Exception: pass _log(response.status_code) return response # ----------------------------------------------------------------- startup def _pipeline_from_env() -> Pipeline: """Pick config via env var TAU_RAG_PRESET so the same container image can run different flavors (no_llm, hebrew_dense, mock). Two modes: 1. TAU_RAG_CONFIG_JSON=path/to/config.json — load Config from JSON. Wins over TAU_RAG_PRESET. Useful for tau_native + custom verify thresholds without touching code. 2. TAU_RAG_PRESET= — pick a built-in preset from the map below. "tau_native" builds a Hebrew-legal config with the local TAU model as the generator, equivalent to configs/hebrew_legal_local.json. """ import os json_path = os.environ.get("TAU_RAG_CONFIG_JSON", "").strip() if json_path: cfg = Config.from_json(json_path) return Pipeline.from_config(cfg) preset = os.environ.get("TAU_RAG_PRESET", "no_llm") def _tau_native_cfg() -> Config: c = Config.no_llm() c.generation.provider = "tau_native" c.generation.language = "he" c.rerank.enabled = False return c presets = { "mock": Config.mock, "default": Config.default, "hebrew_legal": Config.hebrew_legal, "no_llm": Config.no_llm, # no_llm_lite: same as no_llm but WITHOUT BM25 (saves ~5GB RAM on # 732k corpus; trades ~31% top-5 recall for memory headroom). # Use TAU_RAG_PRESET=no_llm_lite to switch. "no_llm_lite": Config.no_llm_lite, "hebrew_dense": Config.hebrew_dense, "tau_native": _tau_native_cfg, } # v4.x — fallback to `no_llm` (BM25 + gematria + hilbert + graph + # reranker), NOT `mock` (Jaccard-only, no rerank). The original # fallback made an invalid preset name silently degrade retrieval # to a token-overlap baseline that scored ~5 points worse on # diagnose_v10. `no_llm` is dependency-free yet uses the full # retrieval stack — same as the os.environ.get default above. if preset not in presets: print(f"[tau-rag] unknown preset {preset!r}, " f"falling back to 'no_llm' " f"(valid: {sorted(presets)})") cfg = presets.get(preset, Config.no_llm)() return Pipeline.from_config(cfg) # One shared pipeline. Swap the config for production. _pipeline: Pipeline = _pipeline_from_env() # v4.x — Register `_pipeline` as the global singleton in pipeline.py so # `get_pipeline()` (used by /v1/query, /v1/data/load_jsonl, and many # other endpoints) returns this same instance instead of lazily building # a SECOND pipeline. Before this line, the two were independent, which # caused community-doc promotions and per-session uploads added via # `_pipeline.add_documents(...)` to be invisible to search (silently # routed to a different instance). All the fixes that switched to # `get_pipeline()` for promote/replay continue to work; this also fixes # the symmetric problem for endpoints that still use `_pipeline.*` # directly (get_document, list_documents, index_stats, etc.). try: from ..pipeline import set_pipeline as _set_global_pipeline _set_global_pipeline(_pipeline) print("[tau-rag] pipeline singleton unified (fastapi_app + pipeline.py)") except Exception as _unify_err: print(f"[tau-rag] pipeline singleton unify failed: {_unify_err}") # Auto-restore from snapshot if TAU_RAG_SNAPSHOT_PATH is set import os as _os _snap_path = _os.environ.get("TAU_RAG_SNAPSHOT_PATH") if _snap_path and _os.path.exists(_snap_path): try: _restore_summary = _pipeline.load_snapshot(_snap_path, replace=False) print(f"[tau-rag] restored from snapshot: {_restore_summary}") except Exception as _e: print(f"[tau-rag] snapshot restore failed: {_e}") # Periodic auto-snapshot — fires every N seconds as a crash-proofing measure. from ..snapshot import AutoSnapshotter, set_autosnapshotter, get_autosnapshotter # noqa: E402 _snap_interval = _os.environ.get("TAU_RAG_SNAPSHOT_INTERVAL") if _snap_path and _snap_interval: try: _iv = float(_snap_interval) if _iv > 0: _auto = AutoSnapshotter( _pipeline, _snap_path, interval_sec=_iv, on_save=lambda s: get_obs().audit( "snapshot.auto_periodic", **s), ) _auto.start() set_autosnapshotter(_auto) print(f"[tau-rag] periodic auto-snapshot every {_iv}s → {_snap_path}") except Exception as _e: print(f"[tau-rag] periodic snapshot setup failed: {_e}") # Periodic metrics history sampler (v1.78) — optional, enabled by env var. try: _metrics_iv_raw = _os.environ.get("TAU_RAG_METRICS_HISTORY_INTERVAL_SEC") if _metrics_iv_raw: _metrics_iv = float(_metrics_iv_raw) if _metrics_iv > 0: from ..middleware import ( MetricsHistory, MetricsHistorySampler, get_metrics_history, set_metrics_sampler, ) _mcap = int(_os.environ.get( "TAU_RAG_METRICS_HISTORY_CAPACITY", "720")) # Replace the default history with one sized from env. _h = MetricsHistory(max_samples=max(10, _mcap)) from ..middleware import set_metrics_history set_metrics_history(_h) _sampler = MetricsHistorySampler( _h, interval_s=_metrics_iv, sample_on_start=True, ) _sampler.start() set_metrics_sampler(_sampler) print(f"[tau-rag] metrics history sampler every " f"{_metrics_iv}s cap={_mcap}") except Exception as _e: print(f"[tau-rag] metrics history sampler setup failed: {_e}") # Background analytics retention scheduler (v1.93) — optional, enabled # by TAU_RAG_ANALYTICS_TTL_DAYS. try: _ttl_raw = _os.environ.get("TAU_RAG_ANALYTICS_TTL_DAYS") if _ttl_raw: _ttl_days = float(_ttl_raw) if _ttl_days > 0: _prune_iv = float(_os.environ.get( "TAU_RAG_ANALYTICS_PRUNE_INTERVAL_SEC", "3600")) from ..middleware import ( AnalyticsRetentionScheduler, set_retention_scheduler, ) def _analytics_prune_cb(summary): try: if summary.get("total_removed", 0) > 0: get_obs().audit( "analytics.prune.auto", ttl_seconds=summary.get("ttl_seconds"), total_removed=summary.get("total_removed"), ) elif summary.get("error"): get_obs().audit( "analytics.prune.auto.error", error=summary["error"], ) except Exception: pass _retention = AnalyticsRetentionScheduler( ttl_seconds=_ttl_days * 86400.0, interval_s=_prune_iv, on_prune=_analytics_prune_cb, ) _retention.start() set_retention_scheduler(_retention) print(f"[tau-rag] analytics retention scheduler: " f"ttl={_ttl_days}d interval={_prune_iv}s") except Exception as _e: print(f"[tau-rag] retention scheduler setup failed: {_e}") # Background alert evaluator (v1.81) — optional, enabled by env var. try: _alert_iv_raw = _os.environ.get("TAU_RAG_ALERT_EVAL_INTERVAL_SEC") if _alert_iv_raw: _alert_iv = float(_alert_iv_raw) if _alert_iv > 0: from ..middleware import ( AlertScheduler, get_alert_store, get_metrics_history, set_alert_scheduler, ) def _alert_fire_cb(verdict): try: get_obs().audit( "alert.fired", rule=verdict["rule"], reason=verdict["reason"], latest_value=verdict["latest_value"], n_samples=verdict["n_samples"], ) except Exception: pass _asched = AlertScheduler( get_alert_store(), get_metrics_history(), interval_s=_alert_iv, on_fire=_alert_fire_cb, evaluate_on_start=True, ) _asched.start() set_alert_scheduler(_asched) print(f"[tau-rag] alert scheduler every {_alert_iv}s") except Exception as _e: print(f"[tau-rag] alert scheduler setup failed: {_e}") # Auto-warmup on startup if env requests it (v1.56). if _os.environ.get("TAU_RAG_WARMUP") == "1": try: _fn = getattr(_pipeline, "warmup", None) if callable(_fn): _fn() _pipeline._warmed = True # type: ignore[attr-defined] print("[tau-rag] auto-warmup complete") except Exception as _e: print(f"[tau-rag] auto-warmup failed: {_e}") # v4.x — Auto-load main corpus.jsonl on startup. Eliminates the manual # `curl /v1/data/load_jsonl` step after every restart that was costing # us ~5 points on diagnose_v10 because the queries hit an empty pipeline # and silently fell through to live-only retrievers. # # Looks at TAU_RAG_AUTOLOAD_CORPUS first; otherwise tries # tau_rag/runtime/corpus.jsonl as a sensible default. Set # TAU_RAG_AUTOLOAD_CORPUS=0 to disable. def _expected_fingerprint_from_files() -> str: """Compute the corpus fingerprint by scanning the JSONL files on disk WITHOUT loading them through the pipeline. Used to decide whether to skip the rebuild and use persisted retriever state.""" import hashlib from pathlib import Path as _PP import json as _ej here = _PP(__file__).resolve().parent.parent paths = [here / "runtime" / "corpus.jsonl", here / "runtime" / "parquet_cases.jsonl", here / "runtime" / "community_corpus.jsonl"] ids = [] for p in paths: if not p.exists(): continue try: with p.open("r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _ej.loads(line) if rec.get("id"): ids.append(str(rec["id"])) except Exception: pass except Exception: pass if not ids: return "" h = hashlib.md5("|".join(sorted(ids)).encode("utf-8")).hexdigest()[:12] return f"n={len(ids)};h={h}" # Build progress state — shared with public_system_status # Populated during corpus autoload, read by the UI to render a live # progress bar during long rebuilds (300k+ docs takes 15-20 min). _build_progress: Dict[str, Any] = {"phase": "idle"} def _build_progress_set(key: str, value: Any) -> None: """Update a progress field. Cheap — just dict assignment.""" _build_progress[key] = value def public_build_progress() -> Dict[str, Any]: """Snapshot of current build state — used by /v1/system/status.""" return dict(_build_progress) def _autoload_corpus_on_startup() -> None: import json as _alj from pathlib import Path as _P from ..core.types import Document as _Doc from ..pipeline import get_pipeline as _get_pipe flag = _os.environ.get("TAU_RAG_AUTOLOAD_CORPUS", "") if flag == "0": return explicit = _os.environ.get("TAU_RAG_AUTOLOAD_CORPUS_PATH") if explicit: path = _P(explicit).expanduser().resolve() else: # Default: tau_rag/runtime/corpus.jsonl (relative to this module) here = _P(__file__).resolve().parent.parent path = here / "runtime" / "corpus.jsonl" if not path.exists() or not path.is_file(): return # ---- Try persistent retriever indexes first ---- # If we saved indexes from a previous run AND the corpus on disk # hasn't changed, skip the entire chunking + indexing pipeline. # Each retriever now exposes state_dict()/load_state_dict() so the # generic pickle-test path that used to hang on the BM25 inverted # index is no longer reached. Set TAU_RAG_RETRIEVER_PERSISTENCE=0 # to disable (e.g. while debugging, or if disk is read-only). if _os.environ.get("TAU_RAG_RETRIEVER_PERSISTENCE", "1") == "1": try: from ..storage import (load_all_retrievers, fingerprint_corpus as _fpc) pipe = _get_pipe() # Compute the EXPECTED fingerprint from the JSONL files only # (not the in-memory state, which is empty). Cheap: count lines # and hash the IDs. expected = _expected_fingerprint_from_files() if expected: manifest = load_all_retrievers(pipe, expected_fingerprint=expected) if manifest is not None: n_loaded = manifest.get("n_loaded", 0) n_docs = manifest.get("indexed_docs", {}).get("n_docs", 0) print(f"[tau-rag] retrievers loaded from disk: " f"{n_loaded} retrievers, {n_docs} docs " f"(fingerprint match — skipping rebuild)") return except Exception as e: print(f"[tau-rag] retriever persistence load failed: {e}") try: pipe = _get_pipe() # Skip if pipeline already has docs (e.g. from snapshot restore) existing = len(getattr(pipe, "_indexed_docs", []) or []) if existing > 50: print(f"[tau-rag] corpus auto-load skipped: pipeline already " f"has {existing} docs") return # v2 — domain classifier: stamp each doc with metadata.domain + # metadata.domain_scores so the frontend can filter / browse by # legal domain (חוזים, חברות, משפחה …). Cheap to run at load time. try: from ..domain_classifier import classify as _classify_dom except Exception: _classify_dom = None # graceful degradation — feature off if import breaks # ---- Build progress tracker ---- # Progress is stored in module globals so /v1/system/status can # read it during the long rebuild. Three phases: parsing → indexing # → done. ETA computed from rolling rate. import time as _t_prog _build_progress_set("phase", "parsing") _build_progress_set("started_at", int(_t_prog.time())) # Cheap pre-count to set total (for ETA). Reading lines is fast # even on 350k docs — the heavy work is parsing JSON + classifying. try: with path.open("r", encoding="utf-8") as _cnt: n_total_lines = sum(1 for _ in _cnt) except Exception: n_total_lines = 0 _build_progress_set("n_total", n_total_lines) _build_progress_set("source", path.name) docs = [] bad = 0 _t_parse_start = _t_prog.time() with path.open("r", encoding="utf-8") as f: for i, line in enumerate(f): line = line.strip() if not line: continue try: rec = _alj.loads(line) md = dict(rec.get("metadata") or {}) text = str(rec.get("text", "")) if _classify_dom and "domain" not in md: try: res = _classify_dom(text) if res.get("top"): md["domain"] = res["top"] md["domain_scores"] = res.get("scores", {}) except Exception: pass docs.append(_Doc( id=str(rec.get("id", "")), text=text, metadata=md, )) except Exception: bad += 1 # Update progress every 1000 docs (cheap) if (i + 1) % 1000 == 0: elapsed = _t_prog.time() - _t_parse_start rate = (i + 1) / max(elapsed, 0.001) eta = (n_total_lines - i - 1) / max(rate, 1) _build_progress_set("n_done", i + 1) _build_progress_set("rate_per_s", round(rate, 1)) _build_progress_set("eta_s", int(eta)) if docs: _build_progress_set("phase", "indexing") _build_progress_set("n_done", len(docs)) # Default chunker: "legal_hebrew" — section-aware splitting on # סעיף N / פרק N / (א) / (1) etc., so a query like "מה אומר # סעיף 39?" hits a chunk whose entire content IS that section. # Empirically benchmarked on diagnose_v10 with the no_llm # preset (BM25 + gematria + hilbert + graph + reranker): # • legal_hebrew → section 1.00, concept 1.00, applied 0.47, # generalize 0.90 (≈10.2/12, best) # • fixed → section 1.00, concept 1.00, applied 0.40, # generalize 0.90 (≈10.0/12) # Override with TAU_RAG_AUTOLOAD_CHUNKER=fixed (or sentence) # if a different corpus shape is being indexed. chunker_name = _os.environ.get( "TAU_RAG_AUTOLOAD_CHUNKER", "legal_hebrew") _t_idx_start = _t_prog.time() n = pipe.add_documents(docs, chunker=chunker_name) idx_dt = _t_prog.time() - _t_idx_start _build_progress_set("phase", "done") _build_progress_set("n_chunks", n) _build_progress_set("indexing_s", round(idx_dt, 1)) _build_progress_set("eta_s", 0) print(f"[tau-rag] corpus auto-load: {len(docs)} docs, " f"{n} chunks via {chunker_name} (bad: {bad}) " f"from {path.name} ({idx_dt:.1f}s)") except Exception as _e: _build_progress_set("phase", "failed") _build_progress_set("error", str(_e)) print(f"[tau-rag] corpus auto-load failed: {_e}") _autoload_corpus_on_startup() # v2 — Parquet judgments auto-load (Hebrew supreme-court rulings sampled # from LawDBHeb/cases_clean.parquet via tau_rag/scripts/ingest_parquet_cases.py). # Disabled with TAU_RAG_AUTOLOAD_PARQUET=0. def _autoload_parquet_cases_on_startup() -> None: if _os.environ.get("TAU_RAG_AUTOLOAD_PARQUET", "1") == "0": return import json as _pj from pathlib import Path as _PP from ..core.types import Document as _DocP from ..pipeline import get_pipeline as _get_pipe_p here = _PP(__file__).resolve().parent.parent path = here / "runtime" / "parquet_cases.jsonl" if not path.exists() or path.stat().st_size == 0: return try: # Reuse the domain classifier so docs without a stamped domain # still get one, exactly like _autoload_corpus_on_startup does. try: from ..domain_classifier import classify as _cls_p except Exception: _cls_p = None pipe = _get_pipe_p() docs: list = [] bad = 0 with path.open("r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _pj.loads(line) md = dict(rec.get("metadata") or {}) text = str(rec.get("text", "")) if _cls_p and "domain" not in md: try: res = _cls_p(text) if res.get("top"): md["domain"] = res["top"] md["domain_scores"] = res.get("scores", {}) except Exception: pass docs.append(_DocP( id=str(rec.get("id", "")), text=text, metadata=md, )) except Exception: bad += 1 if docs: chunker = _os.environ.get("TAU_RAG_AUTOLOAD_CHUNKER", "legal_hebrew") n = pipe.add_documents(docs, chunker=chunker) print(f"[tau-rag] parquet cases auto-load: {len(docs)} docs, " f"{n} chunks via {chunker} (bad: {bad}) " f"from {path.name}") # Invalidate citation network cache — it'll be rebuilt on # next /v1/judgments/.../network or /v1/citations/popular call try: if hasattr(pipe, "_citation_network_cache"): pipe._citation_network_cache = None if hasattr(pipe, "_outcome_stats_cache"): pipe._outcome_stats_cache = None except Exception: pass except Exception as _e: print(f"[tau-rag] parquet cases auto-load failed: {_e}") _autoload_parquet_cases_on_startup() # v5 — Scraped content auto-load (court verdicts + legislation + WhatsApp # threads + dover press releases). Written by tau_rag.scrapers — same # {id, text, metadata} shape as parquet_cases, so we reuse the loader. def _autoload_scraped_on_startup() -> None: if _os.environ.get("TAU_RAG_AUTOLOAD_SCRAPED", "1") == "0": return import json as _sj from pathlib import Path as _PS from ..core.types import Document as _DocS from ..pipeline import get_pipeline as _get_pipe_s here = _PS(__file__).resolve().parent.parent path = here / "runtime" / "scraped" / "scraped_corpus.jsonl" if not path.exists() or path.stat().st_size == 0: return try: try: from ..domain_classifier import classify as _cls_s except Exception: _cls_s = None pipe = _get_pipe_s() docs = [] bad = 0 with path.open("r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _sj.loads(line) md = dict(rec.get("metadata") or {}) text = str(rec.get("text", "")) if _cls_s and "domain" not in md: try: res = _cls_s(text) if res.get("top"): md["domain"] = res["top"] md["domain_scores"] = res.get("scores", {}) except Exception: pass docs.append(_DocS( id=str(rec.get("id", "")), text=text, metadata=md, )) except Exception: bad += 1 if docs: chunker = _os.environ.get("TAU_RAG_AUTOLOAD_CHUNKER", "legal_hebrew") n = pipe.add_documents(docs, chunker=chunker) print(f"[tau-rag] scraped corpus auto-load: {len(docs)} docs, " f"{n} chunks via {chunker} (bad: {bad}) " f"from {path.name}") except Exception as _e: print(f"[tau-rag] scraped corpus auto-load failed: {_e}") _autoload_scraped_on_startup() # v4.x — Community corpus auto-load on startup. # Approved submissions live in two places: # 1. SQLite (uploads_store.db) — the source of truth. # 2. runtime/community_corpus.jsonl — append-only, replayable. # On every startup we replay the JSONL into the live pipeline so all # previously-approved community docs are searchable, then run a bulk # promote pass to backfill anything that was approved while the server # was down (or to populate the JSONL on first run). # Disabled with TAU_RAG_AUTOLOAD_COMMUNITY=0. # # IMPORTANT: must use pipeline.get_pipeline() — NOT the local _pipeline. # fastapi_app's _pipeline is a DIFFERENT instance from the one /v1/query # (and the rest of the app) uses via get_pipeline(). They were never # registered with each other. Adding docs to the wrong one means search # silently can't find them. if _os.environ.get("TAU_RAG_AUTOLOAD_COMMUNITY", "1") != "0": try: from ..upload import promote as _up_promote from ..pipeline import get_pipeline as _get_pipe _shared_pipe = _get_pipe() _replay = _up_promote.replay_community_corpus(_shared_pipe) if _replay.get("loaded"): print(f"[tau-rag] community corpus replay: " f"{_replay['loaded']} docs, " f"{_replay.get('n_chunks', 0)} chunks") _bulk = _up_promote.bulk_promote_unpromoted(_shared_pipe) if _bulk.get("n_promoted"): print(f"[tau-rag] community corpus backfill: " f"promoted {_bulk['n_promoted']} of " f"{_bulk['n_pending']} pending " f"({_bulk['n_chunks_added']} chunks)") if _bulk.get("n_failed"): print(f"[tau-rag] community corpus backfill: " f"{_bulk['n_failed']} failures") except Exception as _e: print(f"[tau-rag] community auto-load failed: {_e}") # Persist retriever indexes after the full corpus is loaded. # Runs in a background thread so it never blocks startup. Each retriever # now has state_dict()/load_state_dict() — the slow generic pickle.dumps # probe is no longer needed. Set TAU_RAG_RETRIEVER_PERSISTENCE=0 to # disable (debugging, ephemeral filesystems, etc). if _os.environ.get("TAU_RAG_RETRIEVER_PERSISTENCE", "1") == "1": try: from ..pipeline import get_pipeline as _gp_persist from ..storage import save_all_retrievers as _save_retr from ..storage.retriever_persistence import schedule_save as _schedule_save import threading as _th_persist def _do_persist(): try: _shared_pipe = _gp_persist() _existing_n = len(getattr(_shared_pipe, "_indexed_docs", []) or []) if _existing_n > 100: import time as _ttp _t0 = _ttp.time() _manifest = _save_retr(_shared_pipe) _dt = _ttp.time() - _t0 n_saved = sum(1 for r in _manifest.get("retrievers", {}).values() if r.get("saved")) print(f"[tau-rag] retriever indexes persisted: " f"{n_saved} saved in {_dt:.1f}s") except Exception as _e: print(f"[tau-rag] retriever persistence save failed: {_e}") # Run in background — never block startup _th_persist.Thread(target=_do_persist, daemon=True, name="retriever-persist").start() # ---- Auto-save trigger + OOM safety net on add_documents() ---- # Wrap pipe.add_documents to: # 1. Refuse new work if system memory is critically high # (default >90% — set TAU_RAG_OOM_THRESHOLD_PCT to override). # Returns 0 docs added without crashing the process. The # caller sees an empty result and can retry after some # memory pressure has cleared. # 2. Schedule a debounced background save after each # successful call. Multiple rapid adds within 60s coalesce # into a single save — a 5k-doc batch triggers ONE save. # Configurable via TAU_RAG_PERSIST_DEBOUNCE_S (default 60s) # and TAU_RAG_OOM_THRESHOLD_PCT (default 90). try: _shared_pipe = _gp_persist() _orig_add = _shared_pipe.add_documents _debounce_s = float(_os.environ.get( "TAU_RAG_PERSIST_DEBOUNCE_S", "60")) _oom_threshold = float(_os.environ.get( "TAU_RAG_OOM_THRESHOLD_PCT", "90")) def _check_memory_pressure() -> Optional[float]: """Return current memory % if usage is critical, else None. Silent-fail to None if psutil isn't installed.""" try: import psutil as _psm pct = _psm.virtual_memory().percent return pct if pct >= _oom_threshold else None except Exception: return None def _add_documents_with_save(*args, **kwargs): pct = _check_memory_pressure() if pct is not None: print(f"[tau-rag] add_documents REFUSED — system memory " f"at {pct:.1f}% (>= {_oom_threshold}% threshold). " f"Free RAM or restart before continuing.") return 0 # signal "no docs added" result = _orig_add(*args, **kwargs) try: _schedule_save(_shared_pipe, delay_seconds=_debounce_s) except Exception: pass # never break add_documents on save scheduling return result _shared_pipe.add_documents = _add_documents_with_save print(f"[tau-rag] auto-save trigger installed " f"(debounce={_debounce_s}s, oom_threshold={_oom_threshold}%)") except Exception as _e: print(f"[tau-rag] auto-save trigger install failed: {_e}") # ---- Graceful shutdown: flush pending save on SIGTERM/SIGINT ---- # Without this, killing the server within `_debounce_s` of a recent # add_documents() loses any state that was queued for save. # Uvicorn's own signal handler triggers our atexit, but to be safe # we register an explicit signal handler that runs flush before # the rest of the shutdown sequence. try: from ..storage.retriever_persistence import flush_pending_save import signal as _signal, atexit as _atexit _shutdown_done = {"value": False} def _graceful_shutdown(*args): if _shutdown_done["value"]: return _shutdown_done["value"] = True try: pipe = _gp_persist() n_docs = len(getattr(pipe, "_indexed_docs", []) or []) if n_docs > 100: print(f"[tau-rag] graceful shutdown — flushing " f"persistence ({n_docs:,} docs)...") flush_pending_save(pipe, timeout_s=120.0) except Exception as _e: print(f"[tau-rag] shutdown flush failed: {_e}") # SIGTERM = systemd / Docker stop. SIGINT = Ctrl+C. # We DON'T install for SIGKILL — it's uncatchable by design. try: _signal.signal(_signal.SIGTERM, _graceful_shutdown) except (ValueError, OSError): pass # not main thread (uvicorn workers) try: _signal.signal(_signal.SIGINT, _graceful_shutdown) except (ValueError, OSError): pass # atexit as a final safety net (e.g. sys.exit() in code path) _atexit.register(_graceful_shutdown) print(f"[tau-rag] graceful shutdown handlers installed " f"(SIGTERM, SIGINT, atexit)") except Exception as _e: print(f"[tau-rag] shutdown handler install failed: {_e}") except Exception as _e: print(f"[tau-rag] retriever persistence start failed: {_e}") # Seed admin API key from env var on startup (dev convenience). # If TAU_RAG_SEED_ADMIN_KEY is set, register it as an admin key so # curl/browser can use it immediately. In production, use a secret # manager — don't commit the env value to source control. def _seed_admin_key_from_env() -> None: import os seed = os.environ.get("TAU_RAG_SEED_ADMIN_KEY", "").strip() if not seed: return try: from ..middleware.auth import ( get_auth, _hash_key, APIKey) store = get_auth() h = _hash_key(seed) # Register as admin scope — idempotent (overwrite if exists) store._keys[h] = APIKey( key_hash=h, label="seed-admin-from-env", scopes={"admin", "read", "write"}, ) try: store._save() except Exception: pass print( "[tau-rag] Seeded admin key from " "TAU_RAG_SEED_ADMIN_KEY " f"(hash={h[:12]}...) — use it as X-API-Key") except Exception as e: print(f"[tau-rag] Failed to seed admin key: {e}") # Auto-snapshot on shutdown — pairs with auto-restore on startup above. def _save_snapshot_on_shutdown() -> None: # Stop the periodic thread first so we don't race with the final save auto = get_autosnapshotter() if auto: auto.stop() set_autosnapshotter(None) # v1.78 — also stop the metrics sampler cleanly try: from ..middleware import get_metrics_sampler, set_metrics_sampler msamp = get_metrics_sampler() if msamp: msamp.stop() set_metrics_sampler(None) except Exception: pass # v1.81 — stop the alert scheduler cleanly try: from ..middleware import get_alert_scheduler, set_alert_scheduler asched = get_alert_scheduler() if asched: asched.stop() set_alert_scheduler(None) except Exception: pass # v1.93 — stop the analytics retention scheduler cleanly try: from ..middleware import ( get_retention_scheduler, set_retention_scheduler, ) rs = get_retention_scheduler() if rs: rs.stop() set_retention_scheduler(None) except Exception: pass path = _os.environ.get("TAU_RAG_SNAPSHOT_PATH") if not path: return try: summary = _pipeline.save_snapshot(path) print(f"[tau-rag] shutdown snapshot saved: {summary}") get_obs().audit("snapshot.auto_save_on_shutdown", **summary) except Exception as _e: print(f"[tau-rag] shutdown snapshot failed: {_e}") # v2.99.84 — lightweight ping endpoint. Used by: # - keep-warm loop (faster than /readyz) # - frontend on page load (TLS handshake + container wake-up) # Returns instantly without touching the pipeline. @app.get("/v1/ping") def ping(): # type: ignore return {"ok": True, "ts": _wa_time.time()} # v2.99.84 — in-memory dashboard cache (5-sec TTL) — avoids re-reading # the entire JSONL file on every refresh (every modal open triggers this). _WA_DASH_CACHE: Dict[tuple, tuple] = {} # (days, top_k) → (response, expires_ts) _WA_DASH_TTL = 5.0 async def _keep_warm_loop(): """Day 48 — background task that keeps the pipeline warm. HF Spaces (CPU-basic tier) doesn't stop the container, but after a few minutes of idle the heavy retrieval pipeline drops out of OS page cache + Python LRU caches. First request after idle then takes ~20s vs ~1s warm. This task fires every TAU_RAG_KEEPWARM_SEC seconds (default 240 = 4 min, so well under the typical 5-10 min cooling window) and runs a trivial retrieve to keep the hot path resident. Errors are swallowed — keep-warm should never crash the app. Disable by setting TAU_RAG_KEEPWARM_SEC=0. """ import asyncio # v2.99.84 — was 240, lowered to 180 for safer margin under HF idle. interval = int(_os.environ.get("TAU_RAG_KEEPWARM_SEC", "180")) if interval <= 0: return # Wait 60s after boot before starting — let real startup finish first. await asyncio.sleep(60) while True: try: from ..pipeline import get_pipeline from ..core.types import Query pipe = get_pipeline() # Trivial query — touches retrievers + tokenizer caches. await asyncio.to_thread(pipe.retrievers.search, Query(text="חוזה"), k=2) except Exception: pass # swallow — never crash the app from keep-warm await asyncio.sleep(interval) async def _eager_prewarm(): """Day 48 — eager-load the pipeline BEFORE the Space accepts HTTP traffic, so no first-user ever pays the ~19s cold-start penalty. Without this, the very first HTTP request after every container restart (deploy, OOM-kill, etc.) pays ~19s for retriever + encoder + shard loading. That's exactly the moment a friend-lawyer clicks the link for the first time → they conclude the system is broken and bounce. v2 (Day 48 follow-up): runs MULTIPLE representative queries covering distinct domains (contracts/labor/torts/procedure/criminal). Single-query prewarm only touched one shard set — the second query in a different domain still paid ~2-3s for shard load. With 6 queries spanning the main domains, every subsequent request hits warm caches across the board. Trade-off: startup is now ~15-25s slower. HF reports stage `RUNNING_APP_STARTING` while this runs. Worth it: every user from the very first request gets <2s warm latency on ANY domain. Disable by setting TAU_RAG_EAGER_PREWARM=0 (e.g. in dev). """ import asyncio, time as _t if _os.environ.get("TAU_RAG_EAGER_PREWARM", "1") == "0": return t0 = _t.time() try: from ..pipeline import get_pipeline from ..core.types import Query pipe = await asyncio.to_thread(get_pipeline) # Multi-domain warmup — each query likely hits a different shard # and exercises distinct retrieval paths. Order roughly by # expected user-traffic share. WARMUP_QUERIES = [ "חוזה", # contracts (most common) "אפרופים פרשנות", # supreme-court anchor cluster "פיטורים שימוע", # labor shard "רשלנות נזיקין", # torts shard "חזקת חפות", # criminal shard "סבירות מנהלית", # administrative shard ] for q in WARMUP_QUERIES: try: await asyncio.to_thread( pipe.retrievers.search, Query(text=q), k=2) except Exception: pass # one bad query shouldn't block the others print(f"[eager-prewarm] {len(WARMUP_QUERIES)} domains warm " f"after {_t.time()-t0:.1f}s") except Exception as e: # Don't block startup if prewarm fails — degraded mode is still # better than no service. print(f"[eager-prewarm] FAILED ({type(e).__name__}: {e}) — " f"first request will pay cold-start cost") @asynccontextmanager async def _app_lifespan(_app: FastAPI): _seed_admin_key_from_env() # Day 48 — EAGER prewarm BEFORE yielding control. The Space is in # RUNNING_APP_STARTING during this; HF won't route traffic yet. So # no user ever hits a cold pipeline. await _eager_prewarm() # Day 48 — also start the keep-warm background task to prevent # re-cooling. asyncio.create_task schedules it on the running loop; # the lifespan keeps a reference so it isn't GC'd. Task is cancelled # on shutdown. import asyncio keep_warm_task = asyncio.create_task(_keep_warm_loop()) try: yield finally: keep_warm_task.cancel() try: await keep_warm_task except (asyncio.CancelledError, Exception): pass _save_snapshot_on_shutdown() app.router.lifespan_context = _app_lifespan app.include_router(system_router) app.include_router(documents_router) app.include_router(public_router) app.include_router(chat_router) app.include_router(admin_content_router) app.include_router(admin_controls_router) app.include_router(admin_ops_router) app.include_router(admin_runtime_router) # ------------------------------------------------------------------ schemas # Request models for the public HTTP surface live in api.models. # ------------------------------------------------------------------ routes _PLAYGROUND_HTML = """ TAU-RAG

🔎 TAU-RAG — Hebrew legal RAG

Pipeline alive at /v1/*. Swagger UI: /docs · ReDoc: /redoc

📄 Documents
❓ Single query
💬 Chat

Add documents


Ask a question


Chat with session memory

session
""" def _static_file(name: str) -> Optional[str]: """Read a file from tau_rag/static/ if it exists.""" import os here = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) p = os.path.join(here, "static", name) if not os.path.isfile(p): return None try: with open(p, "r", encoding="utf-8") as f: return f.read() except Exception: return None def root(): """Serve the end-user chat UI (tau_rag/static/index.html). Falls back to the inline playground if the static file is missing.""" from fastapi.responses import HTMLResponse html = _static_file("index.html") if html is None: html = _PLAYGROUND_HTML return HTMLResponse(html) def admin_console(): """Serve the admin dashboard (tau_rag/static/admin.html).""" from fastapi.responses import HTMLResponse html = _static_file("admin.html") if html is None: return HTMLResponse( "

Admin console not available

" "

tau_rag/static/admin.html is missing.

", status_code=404) return HTMLResponse(html) def playground(): """The original built-in playground, kept for backward-compat.""" from fastapi.responses import HTMLResponse return HTMLResponse(_PLAYGROUND_HTML) def favicon(): # Silent 204 to stop the noisy browser 404 from fastapi.responses import Response return Response(status_code=204) def health(): return {"ok": True, "version": "2.0.0"} def version_manifest(): """Build + runtime version info. Unauthenticated so anyone (including deploy scripts, monitoring, and teammates debugging) can check what's actually running. Does not expose secrets, just structural metadata.""" import platform as _plat import sys as _sys import subprocess as _sp # Pipeline structure retr_multi = getattr(_pipeline, "retrievers", None) retriever_members = ( sorted(getattr(retr_multi, "retrievers", {}).keys()) if retr_multi is not None else [] ) cfg = _pipeline.config preset = _os.environ.get("TAU_RAG_PRESET", "unknown") # Build info — keep it safe to serialize try: import fastapi as _fastapi fastapi_v = getattr(_fastapi, "__version__", "unknown") except Exception: fastapi_v = "unknown" # Git metadata — optional; silent fallback if not in a git checkout git_info: Dict[str, Any] = {} try: commit = _sp.check_output( ["git", "rev-parse", "HEAD"], stderr=_sp.DEVNULL, timeout=1).decode().strip() branch = _sp.check_output( ["git", "rev-parse", "--abbrev-ref", "HEAD"], stderr=_sp.DEVNULL, timeout=1).decode().strip() dirty = _sp.check_output( ["git", "status", "--porcelain"], stderr=_sp.DEVNULL, timeout=1).decode().strip() git_info = { "commit": commit, "commit_short": commit[:8], "branch": branch, "dirty": bool(dirty), } except Exception: git_info = {"available": False} # Enabled feature flags (from env) — helps debug "is it off/on in prod?" features = { "auth_required": _os.environ.get("TAU_RAG_REQUIRE_AUTH") == "1", "auto_warmup": _os.environ.get("TAU_RAG_WARMUP") == "1", "snapshot_path": bool(_os.environ.get("TAU_RAG_SNAPSHOT_PATH")), "snapshot_interval": float(_os.environ.get("TAU_RAG_SNAPSHOT_INTERVAL") or 0) or None, "synonyms_path": bool(_os.environ.get("TAU_RAG_SYNONYMS_PATH")), "hsts": _os.environ.get("TAU_RAG_HSTS") == "1", "csp": bool(_os.environ.get("TAU_RAG_CSP")), "cors_origins": int(bool(_os.environ.get("TAU_RAG_CORS_ORIGINS"))), "log_stdout": _os.environ.get("TAU_RAG_LOG_STDOUT") == "1", "log_file": bool(_os.environ.get("TAU_RAG_LOG_PATH")), "audit_webhook": bool(_os.environ.get("TAU_RAG_AUDIT_WEBHOOK_URL")), "endpoint_rate_limits": bool( _os.environ.get("TAU_RAG_ENDPOINT_RATE_LIMITS")), "audit_export": True, "log_stream": True, "key_rotation": True, "snapshot_diff": True, "metrics_history": bool( _os.environ.get("TAU_RAG_METRICS_HISTORY_INTERVAL_SEC")), "webhook_circuit_breaker": True, "alert_rules": True, "alert_scheduler": bool( _os.environ.get("TAU_RAG_ALERT_EVAL_INTERVAL_SEC")), "doc_stats": True, "retriever_attribution": True, "cocitation_graph": True, "content_health": True, "content_health_ui": True, "eval_latency_gate": True, "content_health_history": True, "query_fingerprints": True, "preset_promote_candidates": True, "preset_auto_promote": True, "analytics_retention": True, "analytics_retention_scheduler": bool( _os.environ.get("TAU_RAG_ANALYTICS_TTL_DAYS")), "doc_freshness": True, "doc_update_priorities": True, "query_doc_affinity": True, "analytics_dump_restore": True, "query_analytics_ui": True, "query_replay": True, "replay_body_capture": ( _os.environ.get("TAU_RAG_OBS_CAPTURE_BODY") == "1"), "v2_stable_api": True, "about_endpoint": True, "semantic_cache": ( _os.environ.get("TAU_RAG_SEMANTIC_CACHE") == "1"), "graph_cocitation_boost": ( float(_os.environ.get("TAU_RAG_GRAPH_COCITATION_BOOST") or 0.0) if _os.environ.get("TAU_RAG_GRAPH_COCITATION_BOOST") else False), "query_doc_boost": ( float(_os.environ.get("TAU_RAG_QUERY_DOC_BOOST") or 0.0) if _os.environ.get("TAU_RAG_QUERY_DOC_BOOST") else False), "request_spans": True, "span_timeline_ui": True, "limiter_backend_protocol": True, "maintenance_mode": True, "pii_redaction": True, "pii_redaction_enabled": ( _os.environ.get("TAU_RAG_PII_REDACT") == "1"), "slow_query_detection": True, "readiness_registry": True, "daily_quota": True, "idempotency_key": True, "request_timeout": True, "log_rotation": True, "body_limit": True, "deprecation_headers": True, "feature_flags_registry": True, "sigterm_autodrain": True, "per_endpoint_timeouts": True, "cost_tracking": True, "response_compression": True, "ip_allowlist": True, "query_complexity": True, "key_labels": True, "i18n_errors": True, "request_bundle": True, "label_aggregation": True, "ops_dashboard_ui": True, "slo_tracking": True, "async_jobs": True, "cost_alerts": True, "prometheus_histograms": True, "etag_conditional_get": True, "hmac_request_signing": True, "batch_queries": True, "webhook_retry_dlq": True, "hebrew_normalization": True, "audit_search": True, "soft_delete": True, "query_coalescing": True, "tenant_flag_overrides": True, "response_redaction": True, "config_snapshot": True, "shadow_pipeline": True, "scheduled_tasks": True, "pipeline_stage_breakers": True, "w3c_traceparent": True, "jwt_auth": True, "bulk_import": True, "doc_acl": True, "w3c_baggage": True, "canary_routing": True, "autocomplete": True, "eval_gate": True, "near_dup_detection": True, "query_intent": True, "doc_versioning": True, "concurrency_limit": True, "language_detection": True, "xss_sanitizer": True, "resource_pool": True, "cache_warmup": True, "lazy_init": True, "pipeline_failover": True, "intent_rerank": True, "stage_budgets": True, "graceful_degradation": True, "query_rewrite": True, "score_calibration": True, "answer_postprocess": True, "retriever_health": True, "stream_throttle": True, "context_sizer": True, "answer_confidence": True, "legal_entities": True, "conversation_summarizer": True, "diversity_ranker": True, "synonym_expansion": True, "feedback_learning": True, "embedding_compression": True, "phrase_detection": True, "meaning_preservation": True, "retrieval_explain": True, "smart_chunking": True, "query_decomposition": True, "cost_estimator": True, "adaptive_k": True, "pipeline_trace": True, "ab_experiments": True, "tenant_quotas": True, "multihop_retrieval": True, "snippet_extraction": True, "answer_grounding": True, "corpus_drift": True, "prometheus_metrics": True, "request_replay": True, "composite_health": True, "platform_manifest": True, "self_diagnose": True, "changelog_generator": True, "html_dashboard": True, "compliance_bundle": True, "middleware_sdk": True, "eval_harness": True, "session_persistence": True, "event_bus": True, "graph_retriever": True, "federated_retrieval": True, "audit_export": True, "graph_builder": True, "answer_templates": True, "clarification_planner": True, "citation_parser": True, "spell_correct": True, "retrieval_budget": True, "doc_summarizer": True, "privilege_filter": True, "time_travel": True, "fact_extractor": True, "answer_consensus": True, "authority_ranker": True, "doc_comparator": True, "similar_docs": True, "query_lifecycle": True, "timeline_builder": True, "corpus_contradictions": True, "anonymizer": True, "doc_lineage": True, "result_explainer": True, "audit_anomaly": True, "llm_router": True, "citation_expander": True, "answer_quality_gate": True, "coverage_monitor": True, "followup_rewriter": True, "reasoning_chain": True, "injection_detector": True, "cost_forecaster": True, "crosslingual_bridge": True, "diversity_enforcer": True, "session_exporter": True, "fact_consistency": True, "issue_spotter": True, "doc_classifier": True, "cache_invalidator": True, "query_analytics": True, "corpus_router": True, "stream_chunker": True, "kg_extractor": True, "citation_network": True, "template_extractor": True, "slow_query_analyzer": True, "confidence_calibrator": True, "preview_generator": True, "retrieval_agreement": True, "answer_source_balance": True, "doc_staleness": True, "retrieval_coverage": True, "answer_hedging": True, "query_routing_optimizer": True, "answer_specificity": True, "rank_stability": True, "session_topic_tracker": True, "snippet_dedup": True, "citation_normalizer": True, "query_precision_classifier": True, "answer_format_validator": True, "retrieval_delta": True, "query_paraphrase": True, "doc_quality": True, "answer_coverage_gap": True, "query_throughput": True, "query_normalizer": True, "chunk_overlap": True, "answer_numeric_consistency": True, "error_budget": True, "request_fingerprint": True, "circuit_breaker": True, "span_exporter_protocol": True, "span_exporter_type": type( __import__("tau_rag.observability.span_exporters", fromlist=["get_span_exporter"]) .get_span_exporter()).__name__, } return { "version": "2.0.0", "preset": preset, "pipeline": { "retriever_members": retriever_members, "generator_provider": getattr(cfg.generation, "provider", "unknown"), "fusion_method": getattr(cfg.fusion, "method", "unknown"), "rerank_method": (getattr(cfg.rerank, "method", None) if getattr(cfg, "rerank", None) else None), "verifier": type(_pipeline.verifier).__name__, "chunker": getattr(_pipeline, "_chunker_last", "fixed"), }, "build": { "python": _sys.version.split()[0], "platform": _plat.platform(), "fastapi": fastapi_v, }, "git": git_info, "features": features, } # ------------------------------------------------------ ops-ready endpoints from .metrics import render_prometheus, check_readiness # noqa: E402 def livez(): """Liveness probe — 200 if the process can answer.""" from fastapi.responses import PlainTextResponse return PlainTextResponse("ok", status_code=200) def readyz(require_warmed: bool = False): """Readiness probe — 503 + detail if pipeline isn't ready. Pass ``?require_warmed=1`` to also fail until ``POST /v1/admin/warmup`` has been invoked (useful for deployment gating). v2.11 — also consults the pluggable readiness registry so plugins and operator-registered checks (Redis, S3, etc.) participate. A failing critical check in the registry flips /readyz to 503. """ ok, detail = check_readiness(_pipeline, require_warmed=bool(require_warmed)) # v2.11 — also check pluggable registry (ties v2.7 drain mode and any # operator-registered checks) from ..middleware.readiness import get_readiness_registry reg_result = get_readiness_registry().evaluate() if not ok or not reg_result["ready"]: # Merge detail from both sources body = { "detail": detail if not ok else None, "checks": reg_result["checks"], "n_passed": reg_result["n_passed"], "n_failed": reg_result["n_failed"], } raise HTTPException(status_code=503, detail=body) return {"ok": True, "detail": detail, "checks": reg_result["checks"]} def admin_readiness(): """Full readiness report (v2.11). Always returns 200, unlike /readyz — useful for dashboards that want to show health without tripping k8s routing. Contains every registered check's current state, plus an overall ``ready`` bool.""" from ..middleware.readiness import get_readiness_registry return get_readiness_registry().evaluate() def admin_warmup(request: Request): """Pre-load heavy components (embedders, tokenizers, adapters). Sets the ``pipeline._warmed`` flag so ``/readyz?require_warmed=1`` starts passing.""" import time as _t t0 = _t.time() try: fn = getattr(_pipeline, "warmup", None) if callable(fn): fn() _pipeline._warmed = True # type: ignore[attr-defined] elapsed = round((_t.time() - t0) * 1000.0, 2) get_obs().audit( "pipeline.warmup", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), elapsed_ms=elapsed, ) return {"warmed": True, "elapsed_ms": elapsed} except Exception as e: raise HTTPException(status_code=500, detail=f"warmup failed: {type(e).__name__}: {e}"[:200]) def metrics(): """Prometheus exposition format — scrape me every 15s.""" from fastapi.responses import PlainTextResponse auth = get_auth() keys = auth.list_keys() active = sum(1 for k in keys if not k.get("revoked")) revoked = sum(1 for k in keys if k.get("revoked")) body = render_prometheus( obs_stats=get_obs().stats(), cache_stats=get_cache().stats(), limiter_stats=get_limiter().stats(), auth_keys=active, auth_keys_revoked=revoked, version="2.0.0", ) return PlainTextResponse(body, media_type="text/plain; version=0.0.4") def add_documents(req: DocumentsRequest): try: validate_doc_list(req.documents) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) docs = [Document(id=d.id, text=d.text, metadata=d.metadata) for d in req.documents] n = _pipeline.add_documents(docs) return {"added_chunks": n, "documents": len(docs)} # ---- document lifecycle endpoints ---------------------------------------- def list_documents( request: Request, q: Optional[str] = None, limit: int = 50, offset: int = 0, preview_chars: int = 200, ): """List or search indexed documents. Query params: * ``q`` — substring (case-insensitive) over text + id * ``limit`` / ``offset`` — pagination (default 50 / 0) * ``preview_chars`` — first N chars returned per doc (default 200) * ``metadata.=`` — filter on flat metadata keys (repeatable) Backward-compat: with no query params, returns the v1.38 summary shape ``{documents: [...], count: N}``. """ # Flat metadata filter: any query param starting with 'metadata.' meta_filter: Dict[str, str] = {} for k, v in request.query_params.multi_items(): if k.startswith("metadata."): meta_filter[k[len("metadata."):]] = v if limit < 1 or limit > 10_000: limit = 50 if offset < 0: offset = 0 result = _pipeline.search_documents( q=q, metadata=meta_filter or None, limit=limit, offset=offset, preview_chars=preview_chars, ) # Back-compat: keep `count` key that v1.38 clients expect result["count"] = result["matched"] return result def export_documents(request: Request): """Export the full indexed corpus as JSONL (one ``{id,text,metadata}`` per line). Supports the same filters as ``GET /v1/documents``: * ``?q=`` — substring search * ``?metadata.=`` — flat metadata filter (repeatable) Returns ``application/x-ndjson`` with a download filename so browsers save it as ``tau-rag-documents.jsonl``. """ from fastapi.responses import PlainTextResponse import json as _json q = request.query_params.get("q") meta_filter: Dict[str, str] = {} for k, v in request.query_params.multi_items(): if k.startswith("metadata."): meta_filter[k[len("metadata."):]] = v # Iterate through the full doc-log, applying the same filters as # search_documents() but without the limit cap — export is all-or-none. _pipeline._ensure_doc_log() qn = (q or "").strip().lower() lines: List[str] = [] for d in _pipeline._indexed_docs: if qn: hay = (d.text or "").lower() if qn not in hay and qn not in (d.id or "").lower(): continue if meta_filter: ok = True for mk, mv in meta_filter.items(): if str((d.metadata or {}).get(mk)) != str(mv): ok = False break if not ok: continue lines.append(_json.dumps({ "id": d.id, "text": d.text, "metadata": d.metadata or {}, }, ensure_ascii=False)) body = "\n".join(lines) + ("\n" if lines else "") return PlainTextResponse( body, media_type="application/x-ndjson", headers={ "Content-Disposition": 'attachment; filename="tau-rag-documents.jsonl"', "X-Document-Count": str(len(lines)), }, ) def index_stats(): """Corpus-level stats: doc count, text-length distribution, metadata value histogram, metadata coverage, and retriever set. Safe on large corpora (no full-text scan).""" return _pipeline.index_stats() def admin_duplicates(): """Scan the index for documents sharing normalized content (collapsed whitespace, case-folded, sha256'd). Returns ``{groups: [{hash, members}], n_groups, n_duplicate_docs, total_docs}`` — only groups with ≥2 members. Declared *before* ``/v1/documents/{doc_id}`` so FastAPI matches ``/duplicates`` as a fixed path, not a doc id.""" groups = _pipeline.find_duplicates() pretty = [ {"hash": h, "members": members} for h, members in sorted(groups.items(), key=lambda kv: -len(kv[1])) ] n_dup_docs = sum(len(g["members"]) for g in pretty) total = len(_pipeline.list_documents()) return { "n_groups": len(pretty), "n_duplicate_docs": n_dup_docs, "total_docs": total, "groups": pretty, } # ---- Per-document citation stats (v1.82) -------------------------------- # Placed BEFORE /v1/documents/{doc_id} so FastAPI matches these fixed # paths first — same ordering trick as /duplicates above. def admin_docs_stats_summary(): """Global rollup: n_docs tracked, total retrieved/cited, global cite_rate, persistence path (v1.82).""" from ..middleware import get_doc_stats return get_doc_stats().summary() def admin_docs_stats_top_cited(n: int = 10): """Top ``n`` documents by citation count (v1.82). Each row carries ``{doc_id, n_retrieved, n_cited, cite_rate, first_seen_at, ...}``.""" from ..middleware import get_doc_stats return {"top": get_doc_stats().top_cited(n=int(n))} def admin_docs_stats_unused( min_retrieved: int = 1, max_cite_rate: float = 0.0, ): """Docs that were retrieved ``min_retrieved``+ times but whose cite_rate stayed at or below ``max_cite_rate`` (default 0 → never cited). Useful for finding retrieval false-positives (v1.82).""" from ..middleware import get_doc_stats return { "unused": get_doc_stats().unused( min_retrieved=int(min_retrieved), max_cite_rate=float(max_cite_rate), ), } def admin_docs_stats_reset(request: Request): """Wipe all per-doc counters (v1.82). audit event emitted.""" from ..middleware import get_doc_stats before = get_doc_stats().summary() get_doc_stats().clear() get_obs().audit( "doc.stats.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), prev_n_docs=before["n_docs"], prev_total_cited=before["total_cited"], ) return {"reset": True, "before": before} # ---- Per-retriever attribution (v1.83) ---------------------------------- def admin_retriever_stats(): """All retrievers ranked by n_cited_contributions (v1.83).""" from ..middleware import get_retriever_attribution store = get_retriever_attribution() return { "summary": store.summary(), "stats": store.all_stats(), } def admin_retriever_ranking(): """Retrievers ordered by cite_rate × log(1 + n_contributions) — smooths precision by sample size so rare-but-perfect retrievers don't outrank workhorses (v1.83).""" from ..middleware import get_retriever_attribution return {"ranking": get_retriever_attribution().ranking()} def admin_retriever_stats_reset(request: Request): """Wipe per-retriever counters + audit (v1.83).""" from ..middleware import get_retriever_attribution store = get_retriever_attribution() before = store.summary() store.clear() get_obs().audit( "retriever.stats.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), prev_n_retrievers=before["n_retrievers"], prev_total_cited=before["total_cited"], ) return {"reset": True, "before": before} # ---- Co-citation graph (v1.84) ------------------------------------------ def admin_cocitation_summary(): """Rollup of the co-citation graph: n_events (responses with ≥2 cites), n_pairs, n_docs, total_count (v1.84).""" from ..middleware import get_cocitation return get_cocitation().summary() def admin_cocitation_top(n: int = 20): """Top ``n`` most-common co-citation pairs (v1.84).""" from ..middleware import get_cocitation return {"top": get_cocitation().top_pairs(n=int(n))} def admin_cocitation_reset(request: Request): """Wipe the co-citation graph + audit (v1.84).""" from ..middleware import get_cocitation store = get_cocitation() before = store.summary() store.clear() get_obs().audit( "cocitation.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), prev_n_pairs=before["n_pairs"], prev_total_count=before["total_count"], ) return {"reset": True, "before": before} def admin_content_health( top_n: int = 5, unused_min_retrieved: int = 3, ): """Consolidated corpus health report (v1.85) — merges v1.82 doc stats, v1.83 retriever attribution, v1.84 co-citation into a single answer. Cross-cutting insights: ``dead_docs`` (in corpus but never retrieved), ``isolated_docs`` (in corpus but never co-cited). Query params: * ``top_n`` — rows to include in top-cited / top-pairs / ranking sections (default 5). * ``unused_min_retrieved`` — passed to doc_stats.unused() to filter retrieval false-positives. """ from ..middleware import ( get_doc_stats, get_retriever_attribution, get_cocitation, ) doc_store = get_doc_stats() ra_store = get_retriever_attribution() cc_store = get_cocitation() doc_summary = doc_store.summary() ra_summary = ra_store.summary() cc_summary = cc_store.summary() # Corpus set — every indexed doc id all_docs = {d["id"] for d in _pipeline.list_documents()} touched_docs = { d["doc_id"] for d in doc_store.top_cited(n=10 ** 9) } # all tracked docs, regardless of count # Dead docs: indexed but counters never fired (n_retrieved == 0) dead = sorted(all_docs - touched_docs) # Isolated docs: tracked, but never co-cited with anyone # (i.e. they never appeared in a response with ≥2 sources). partnered = set() for pair in cc_store.top_pairs(n=10 ** 9): partnered.add(pair["a"]) partnered.add(pair["b"]) isolated = sorted(touched_docs - partnered) # Derived corpus-level health score. # * coverage = fraction of indexed docs that were ever retrieved # * cite_rate = global doc-level cite rate # * connectivity = fraction of touched docs that are non-isolated n_all = max(1, len(all_docs)) n_touched = len(touched_docs) coverage = n_touched / n_all cite_rate = doc_summary.get("global_cite_rate", 0.0) connectivity = ( (n_touched - len(isolated)) / max(1, n_touched) if n_touched else 0.0 ) # Equal-weight geometric mean — any dimension collapsing to 0 # drags the whole score down. Helps operators see a single knob. score = (coverage * cite_rate * connectivity) ** (1 / 3) if ( coverage > 0 and cite_rate > 0 and connectivity > 0 ) else 0.0 return { "score": round(score, 4), "coverage": round(coverage, 4), "cite_rate": round(cite_rate, 4), "connectivity": round(connectivity, 4), "corpus": { "n_indexed": len(all_docs), "n_touched": n_touched, "n_dead": len(dead), "n_isolated": len(isolated), }, "top_cited": doc_store.top_cited(n=int(top_n)), "top_noisy": doc_store.unused( min_retrieved=int(unused_min_retrieved), max_cite_rate=0.0, ), "retrievers": { "summary": ra_summary, "ranking": ra_store.ranking()[:int(top_n)], }, "cocitation": { "summary": cc_summary, "top_pairs": cc_store.top_pairs(n=int(top_n)), }, "dead_docs": dead, "isolated_docs": isolated, } # ---- Query fingerprint analytics (v1.89) -------------------------------- def admin_query_stats_summary(): """Rollup: n_unique_queries, n_events (total), avg_sources per query (v1.89).""" from ..middleware import get_query_stats return get_query_stats().summary() def admin_query_stats_top(n: int = 10): """Top ``n`` queries by frequency (v1.89).""" from ..middleware import get_query_stats return {"top": get_query_stats().top(n=int(n))} def admin_query_stats_recent( since: Optional[float] = None, n: int = 10, ): """Queries whose last hit was ≥ ``since`` (Unix ts), newest first (v1.89). Omit ``since`` to get the most-recently-seen N regardless of age.""" from ..middleware import get_query_stats return {"recent": get_query_stats().recent(since=since, n=int(n))} def admin_query_promote_candidates( min_count: int = 3, min_sources: float = 0.0, max_avg_latency_ms: Optional[float] = None, n: int = 20, ): """Return query fingerprints that are strong candidates for promotion to saved presets (v1.90). Heuristic: a query is a good preset candidate when it's been asked often enough to justify the saved-search slot, returns a useful number of sources on average, doesn't already have a preset, and (optionally) isn't slower than some threshold. Query params: * ``min_count`` — minimum observed occurrences (default 3). * ``min_sources`` — minimum ``avg_sources`` per response. Filters out popular queries that find nothing useful (0 = no filter). * ``max_avg_latency_ms`` — optional ceiling on average latency. Omit to not filter. * ``n`` — cap on rows returned. A candidate row is a ``QueryStats.to_dict()`` plus a derived ``suggested_preset_name`` that ops can accept as-is. """ from ..middleware import get_query_stats from ..middleware.query_stats import _canonicalize from ..presets import get_preset_store query_store = get_query_stats() preset_store = get_preset_store() # Pre-index the presets by canonical query text so O(P) setup turns # the per-candidate check into O(1). existing_canonical: set = set() for p in preset_store.list_all(): existing_canonical.add(_canonicalize(p.get("query", ""))) candidates: List[Dict[str, Any]] = [] for row in query_store.top(n=10 ** 9): if row["count"] < int(min_count): continue if row["avg_sources"] < float(min_sources): continue if (max_avg_latency_ms is not None and row["avg_latency_ms"] > float(max_avg_latency_ms)): continue canonical = _canonicalize(row["sample"]) if canonical in existing_canonical: continue # Derive a snake_case preset name from the sample (short). suggested = _suggest_preset_name(row["sample"]) candidates.append({ **row, "suggested_preset_name": suggested, "already_preset": False, }) if len(candidates) >= int(n): break return { "candidates": candidates, "n_candidates": len(candidates), "min_count": int(min_count), "min_sources": float(min_sources), "n_existing_presets": len(existing_canonical), } class PresetPromoteRequest(BaseModel): # Explicit list of fingerprints to promote. Mutually exclusive with # auto-mode filters below — if both are set, names take precedence. fingerprints: Optional[List[str]] = None # Auto-mode: pick candidates via filters (same as v1.90 endpoint) min_count: int = 3 min_sources: float = 0.0 max_avg_latency_ms: Optional[float] = None limit: int = 20 # Common preset knobs — applied to every created preset k: int = 10 rerank_k: int = 5 strategy: str = "hybrid" lang: str = "he" # Naming name_prefix: str = "" # optional prefix (e.g. "promoted-") # Safety dry_run: bool = False # preview without creating @app.post("/v1/admin/queries/promote") def admin_queries_promote(req: PresetPromoteRequest, request: Request): """Auto-promote query-stats candidates to saved presets (v1.91). Two modes: * **Explicit**: pass ``fingerprints=[...]`` to promote specific queries by their v1.89 fingerprints. * **Filtered**: pass the same filter params as ``/v1/admin/queries/promote-candidates`` (v1.90) and we'll promote the top ``limit`` that match. In both modes we skip queries whose canonical text already has a preset, and we deduplicate against name collisions (adding a ``-2``, ``-3`` suffix). ``dry_run=True`` returns the planned actions without touching the preset store. Returns:: { created: [{name, query, fingerprint}, ...], skipped: [{fingerprint, reason}, ...], dry_run: bool, n_created: int, n_skipped: int, } One ``preset.auto_promoted`` audit event is emitted per created preset so the change flows through the webhook (v1.71). """ from ..middleware import get_query_stats from ..middleware.query_stats import _canonicalize from ..presets import get_preset_store, QueryPreset query_store = get_query_stats() preset_store = get_preset_store() # Pre-index existing presets by canonical query AND by name so we # can skip duplicates in both dimensions. existing_canonical: set = set() existing_names: set = set() for p in preset_store.list_all(): existing_canonical.add(_canonicalize(p.get("query", ""))) existing_names.add(p.get("name", "")) # ---- pick candidates if req.fingerprints is not None: # Explicit mode rows = [] for fp in req.fingerprints: s = query_store.get(fp) if s is None: rows.append({"fingerprint": fp, "_missing": True}) else: rows.append(s.to_dict()) else: # Filter mode — mirror the v1.90 logic rows = [] for row in query_store.top(n=10 ** 9): if row["count"] < int(req.min_count): continue if row["avg_sources"] < float(req.min_sources): continue if (req.max_avg_latency_ms is not None and row["avg_latency_ms"] > float(req.max_avg_latency_ms)): continue canonical = _canonicalize(row["sample"]) if canonical in existing_canonical: continue rows.append(row) if len(rows) >= int(req.limit): break # ---- plan created: List[Dict[str, Any]] = [] skipped: List[Dict[str, Any]] = [] used_names = set(existing_names) for row in rows: fp = row.get("fingerprint", "") if row.get("_missing"): skipped.append({"fingerprint": fp, "reason": "fingerprint not found in query_stats"}) continue sample = row.get("sample", "") canonical = _canonicalize(sample) if not canonical: skipped.append({"fingerprint": fp, "reason": "empty query after canonicalization"}) continue if canonical in existing_canonical: skipped.append({"fingerprint": fp, "reason": "already a preset (same canonical)"}) continue base_name = _suggest_preset_name(sample) if req.name_prefix: base_name = f"{req.name_prefix}{base_name}" # Dedupe against already-used names name = base_name suffix = 2 while name in used_names: name = f"{base_name}-{suffix}" suffix += 1 if not req.dry_run: try: preset_store.put(QueryPreset( name=name, query=sample, k=int(req.k), rerank_k=int(req.rerank_k), strategy=req.strategy, lang=req.lang, notes=f"auto-promoted from traffic (fp={fp}, " f"count={row.get('count')})", )) except Exception as e: skipped.append({"fingerprint": fp, "reason": f"put failed: " f"{type(e).__name__}: {e}"}) continue get_obs().audit( "preset.auto_promoted", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, fingerprint=fp, count=row.get("count"), avg_sources=row.get("avg_sources"), ) used_names.add(name) existing_canonical.add(canonical) created.append({ "name": name, "query": sample, "fingerprint": fp, "count": row.get("count"), }) return { "created": created, "skipped": skipped, "n_created": len(created), "n_skipped": len(skipped), "dry_run": bool(req.dry_run), } def _suggest_preset_name(sample: str) -> str: """Turn a raw user query into a safe preset id: lowercase, ASCII where possible, hyphens for whitespace, strip punctuation, cap length. Keeps non-ASCII runs (Hebrew letters) as-is when they can't be transliterated, so the result is still recognizable.""" import re s = (sample or "").strip().lower() # Drop typical punctuation s = re.sub(r"[\?\!\.\,\:\;\(\)\[\]\{\}\"'`]", "", s) s = re.sub(r"\s+", "-", s) # Clip to 48 chars — leaves room for a namespace prefix if len(s) > 48: s = s[:48].rstrip("-") return s or "preset" @app.post("/v1/admin/queries/stats/reset") def admin_query_stats_reset(request: Request): """Wipe the query fingerprint store + audit (v1.89).""" from ..middleware import get_query_stats store = get_query_stats() before = store.summary() store.clear() get_obs().audit( "query.stats.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), prev_n_unique=before["n_unique_queries"], prev_n_events=before["n_events"], ) return {"reset": True, "before": before} class AnalyticsPruneRequest(BaseModel): older_than_days: Optional[float] = None older_than_seconds: Optional[float] = None # Which stores to prune. Omit a flag to skip that store. doc_stats: bool = True retriever_attribution: bool = True cocitation: bool = True query_stats: bool = True @app.get("/v1/admin/analytics/prune/scheduler") def admin_retention_scheduler_status(): """Report the background retention scheduler state (v1.93).""" from ..middleware import get_retention_scheduler sched = get_retention_scheduler() if sched is None: return {"enabled": False, "is_running": False} return {"enabled": True, **sched.status()} @app.post("/v1/admin/analytics/prune") def admin_analytics_prune( req: AnalyticsPruneRequest, request: Request, ): """Prune stale entries across all analytics stores (v1.92). Removes rows whose last-activity timestamp is older than the TTL. Pass ``older_than_days`` OR ``older_than_seconds``; one is required. Per-store flags let ops target a subset (e.g. prune only ``query_stats`` while keeping doc history). Returns per-store ``{n_removed, n_remaining_after}`` + audit event. """ from ..middleware import ( get_doc_stats, get_retriever_attribution, get_cocitation, get_query_stats, ) # Resolve TTL seconds if req.older_than_seconds is not None: ttl_s = float(req.older_than_seconds) elif req.older_than_days is not None: ttl_s = float(req.older_than_days) * 86400.0 else: raise HTTPException( status_code=400, detail={"error": "either older_than_days or " "older_than_seconds is required"}, ) if ttl_s <= 0: raise HTTPException( status_code=400, detail={"error": "TTL must be positive"}, ) results: Dict[str, Dict[str, int]] = {} total_removed = 0 if req.doc_stats: store = get_doc_stats() n = store.prune(ttl_s) results["doc_stats"] = { "n_removed": n, "n_remaining_after": store.summary()["n_docs"], } total_removed += n if req.retriever_attribution: store = get_retriever_attribution() n = store.prune(ttl_s) results["retriever_attribution"] = { "n_removed": n, "n_remaining_after": store.summary()["n_retrievers"], } total_removed += n if req.cocitation: store = get_cocitation() n = store.prune(ttl_s) results["cocitation"] = { "n_removed": n, "n_remaining_after": store.summary()["n_pairs"], } total_removed += n if req.query_stats: store = get_query_stats() n = store.prune(ttl_s) results["query_stats"] = { "n_removed": n, "n_remaining_after": store.summary()["n_unique_queries"], } total_removed += n get_obs().audit( "analytics.prune", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), older_than_seconds=ttl_s, total_removed=total_removed, ) return { "ttl_seconds": ttl_s, "total_removed": total_removed, "per_store": results, } def about(): """Public architectural overview (v2.0). Non-admin — no key required. Useful for clients, docs generators, CI checks. Covers the layers of tau-rag and points at the relevant primitives. """ return { "name": "tau-rag", "version": "2.0.0", "tagline": "Unified Hebrew-legal RAG with structure-preserving " "verification + TAU-Ω signals", "layers": { "retrieval": { "retrievers": ["bm25", "gematria", "hilbert", "graph"], "fusion": "rank-based or weighted", "rerank": "optional cross-encoder or score-based", "chunker": "fixed | sentence | legal_hebrew", }, "observability_stack": { "push": "webhook + breaker (v1.71/79)", "batch": "/v1/admin/audit/export (v1.74)", "pull_stream": "/v1/admin/logs/stream SSE (v1.75)", "history": "metrics + content health (v1.78/88)", "alerts": "rules + scheduler (v1.80/81)", }, "content_analytics": { "doc_stats": "v1.82", "retriever_attribution": "v1.83", "cocitation": "v1.84", "query_stats": "v1.89", "doc_freshness": "v1.94", "query_doc_affinity": "v1.96", }, "analytics_cross_cuts": { "content_health": "v1.85/86", "update_priorities": "v1.95", "query_analytics_ui": "v1.98", "dump_restore": "v1.97", }, "debugging": { "request_ids": "X-Request-ID on every response", "replay": "v1.99 re-execute by request_id", }, }, "patterns": { "side_channel_stores": "singleton+inject pattern; pipeline hook silent-fail; " "admin CRUD; persistence opt-in", "daemons": "AutoSnapshotter / MetricsHistorySampler / " "AlertScheduler / AnalyticsRetentionScheduler — " "start/stop/is_running/status + Event.wait + silent-fail", "quiet_on_zero": "schedulers emit audits only on state change", "html_dashboards": "inline CSS, zero JS, zero CDN, escape-safe, " "meta-refresh for wall screens", }, "stability": { "api_stability": "v2.0 marks /v1/* as stable — additive " "changes only; breaking changes → /v2/*", "deprecation_policy": "6-month notice; features.* flags " "track active capabilities", }, "counts": { "endpoints": "80+", "tests": "1096+", "side_channels": 6, "daemons": 4, "html_dashboards": 4, }, } @app.get("/v1/admin/requests/{request_id}/spans/ui", response_class=Response, include_in_schema=False) def admin_request_spans_ui(request_id: str, refresh: int = 0): """HTML timeline view of a request's spans (v2.5). Renders v2.4 span data as a gantt-style bar chart for quick operator inspection. Same design language as v1.86 / v1.98 dashboards.""" from fastapi.responses import HTMLResponse from .span_timeline_ui import render_span_timeline # Reuse the JSON endpoint's data gathering by calling its function data = admin_request_spans(request_id) html = render_span_timeline( request_id=data["request_id"], n_spans=data["n_spans"], total_ms=data["total_ms"], spans=data["spans"], refresh_sec=int(refresh or 0), ) return HTMLResponse(html) @app.get("/v1/admin/requests/{request_id}/spans") def admin_request_spans(request_id: str): """Return in-memory trace spans for a specific request_id (v2.4). Pipeline stages (understand, retrieve, fuse, rerank, generate, verify, ...) each open a span; middleware auto-tags every span with the current request_id. This endpoint pulls them back by that id. Returns:: { "request_id": str, "n_spans": int, "total_ms": float (root span duration), "spans": [{name, trace_id, span_id, parent_id, duration_ms, attrs}, ...], } Useful for: * seeing where time went inside a slow request * correlating a user complaint with what actually executed * diagnosing retriever-specific failures per query """ from ..observability.tracing import get_tracer spans = get_tracer().spans_for_request_id(request_id) if not spans: raise HTTPException( status_code=404, detail={"error": "no spans found for request_id", "hint": "spans are in-memory — oldest get evicted " "past the 5000-span cap"}, ) out = [] root_total_ms = 0.0 for s in spans: dur_ms = (s.end_ts - s.start_ts) * 1000.0 if s.end_ts else 0.0 if s.parent_id is None and dur_ms > root_total_ms: root_total_ms = dur_ms out.append({ "name": s.name, "trace_id": s.trace_id, "span_id": s.span_id, "parent_id": s.parent_id, "duration_ms": round(dur_ms, 2), "attrs": s.attrs, }) return { "request_id": request_id, "n_spans": len(out), "total_ms": round(root_total_ms, 2), "spans": out, } @app.post("/v1/admin/replay/{request_id}") def admin_replay(request_id: str, request: Request): """Re-execute a previously-logged request against the current pipeline (v1.99). Requires body capture to have been on at the time of the original request (``TAU_RAG_OBS_CAPTURE_BODY=1``). Returns:: { "request_id": original id, "replay_request_id": new id, "path": /v1/search | /v1/generate | /v1/chat, "original_body": captured body (truncated to 4KB), "query": parsed query text, "replay": { "sources": [doc_id, ...], "answer": str|None, "passed": bool|None, "omega": float|None, "timing_ms": dict, }, "note": optional human-readable comparison hint. } Useful for: * regression debug — "did our new chunker break this query?" * eval gold augmentation — turn a real user query into a gold case. * postmortem analysis — replay after a bad deploy to prove harm. """ import json as _json # Find the original row in the obs ring buffer row = None for entry in reversed(get_obs().tail(n=10 ** 9, event_type="request")): if entry.get("request_id") == request_id: row = entry break if row is None: raise HTTPException( status_code=404, detail={"error": "request_id not found in obs log", "hint": "ensure the request was recorded — " "obs log is a ring buffer; older rows " "may have been evicted"}, ) body_txt = (row.get("extra") or {}).get("body") if not body_txt: raise HTTPException( status_code=400, detail={"error": "no captured body on this request", "hint": "set TAU_RAG_OBS_CAPTURE_BODY=1 and re-run " "the original request to enable replay"}, ) try: payload = _json.loads(body_txt) except Exception as e: raise HTTPException( status_code=400, detail={"error": "captured body is not valid JSON", "detail": f"{type(e).__name__}: {e}"}) path = row.get("path") or "" # Support the 3 replayable endpoints from ..core.types import Query, Strategy q_text = payload.get("query") if not q_text: raise HTTPException(status_code=400, detail={"error": "captured body has no 'query' field"}) strategy_name = (payload.get("strategy") or "hybrid").lower() try: strategy = Strategy(strategy_name) except Exception: strategy = Strategy.HYBRID q = Query( text=q_text, lang=payload.get("lang") or "he", filters=payload.get("filters") or {}, strategy=strategy, k=int(payload.get("k") or 10), rerank_k=int(payload.get("rerank_k") or 5), ) # Generate a fresh replay_request_id and mark this as a replay in # obs so the new run is traceable. replay_id = generate_request_id() resp = _pipeline.run(q) # Extract the interesting bits omega = None try: omega = float(resp.signals.omega) if resp.signals else None except Exception: pass verif = getattr(resp, "verification", None) get_obs().audit( "replay.executed", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), original_request_id=request_id, replay_request_id=replay_id, path=path, ) return { "request_id": request_id, "replay_request_id": replay_id, "path": path, "original_body": body_txt, "query": q_text, "replay": { "sources": list(resp.sources or []), "answer": resp.answer, "omega": omega, "passed": (bool(getattr(verif, "passed", False)) if verif else None), "timing_ms": dict(resp.timing_ms or {}), }, "note": ("compare 'replay.sources' to whatever the original " "response had — differences reveal pipeline drift " "since the original run"), } def admin_query_analytics_ui( top_n: int = 10, matrix_queries: int = 6, matrix_docs: int = 6, min_count: int = 3, refresh: int = 0, ): """HTML dashboard for query analytics (v1.98). Merges v1.89 (query_stats), v1.90 (promote candidates), and v1.96 (query × doc affinity) into one visual page. Same design language as the v1.86 content-health UI. Query params: * ``top_n`` — rows to show in 'top queries' (default 10). * ``matrix_queries`` — rows in the affinity heatmap (default 6). * ``matrix_docs`` — cols in the affinity heatmap (default 6). * ``min_count`` — promote-candidate threshold (default 3). * ``refresh`` — auto-refresh seconds (0 = off). """ from fastapi.responses import HTMLResponse from ..middleware import ( get_query_stats, get_query_doc_affinity, ) from ..middleware.query_stats import _canonicalize from ..presets import get_preset_store from .query_analytics_ui import render_query_analytics_ui q_store = get_query_stats() qda = get_query_doc_affinity() preset_store = get_preset_store() summary = q_store.summary() top_qs = q_store.top(n=int(top_n)) # Promote candidates — reuse same logic as v1.90 endpoint existing_canonical = set() for p in preset_store.list_all(): existing_canonical.add(_canonicalize(p.get("query", ""))) promote = [] for row in q_store.top(n=10 ** 9): if row["count"] < int(min_count): continue if row.get("avg_sources", 0.0) < 1.0: continue canon = _canonicalize(row["sample"]) if canon in existing_canonical: continue promote.append({ **row, "suggested_preset_name": _suggest_preset_name(row["sample"]), }) if len(promote) >= 10: break # Build affinity matrix grid matrix_q_rows = q_store.top(n=int(matrix_queries)) # Pick the top docs across the shown queries doc_votes: Dict[str, int] = {} for q in matrix_q_rows: for r in qda.top_docs_for_query(q["fingerprint"], n=10 ** 9): doc_votes[r["doc_id"]] = doc_votes.get(r["doc_id"], 0) + r["count"] top_doc_ids = [d for d, _ in sorted(doc_votes.items(), key=lambda kv: -kv[1])] top_doc_ids = top_doc_ids[:int(matrix_docs)] # Pre-compute the (fp, doc_id) → count map for the rendered subset matrix_pairs: Dict[tuple, int] = {} for q in matrix_q_rows: for r in qda.top_docs_for_query(q["fingerprint"], n=10 ** 9): if r["doc_id"] in top_doc_ids: matrix_pairs[(q["fingerprint"], r["doc_id"])] = r["count"] html = render_query_analytics_ui( summary=summary, top_queries=top_qs, promote_candidates=promote, matrix_queries=matrix_q_rows, matrix_docs=top_doc_ids, matrix_pairs=matrix_pairs, refresh_sec=int(refresh or 0), ) return HTMLResponse(html) def admin_content_health_ui( top_n: int = 5, unused_min_retrieved: int = 3, refresh: int = 0, ): """HTML dashboard for the corpus health report (v1.86). Same data as ``/v1/admin/content/health`` (v1.85) but rendered as a self- contained page. ``?refresh=N`` opts into an HTML meta-refresh every N seconds — handy to leave open on a wall screen.""" from fastapi.responses import HTMLResponse from .content_health_ui import render_content_health_ui health = admin_content_health( top_n=top_n, unused_min_retrieved=unused_min_retrieved, ) html = render_content_health_ui( health, refresh_sec=int(refresh or 0), ) return HTMLResponse(html) # ---- Doc freshness tracking (v1.94) ------------------------------------- def admin_doc_freshness_summary(): """Rollup of doc freshness — n_docs, oldest/newest added_at, median age, total modifications (v1.94).""" from ..middleware import get_doc_freshness return get_doc_freshness().summary() def admin_doc_freshness_stale(older_than_days: float = 90.0): """Docs whose last activity (modified or added) is older than ``older_than_days`` (v1.94). Oldest-first ordering so content audit can start at the top.""" from ..middleware import get_doc_freshness return { "older_than_days": float(older_than_days), "stale": get_doc_freshness().stale( older_than_days=float(older_than_days)), } def admin_doc_freshness_reset(request: Request): """Wipe the freshness side-channel store + audit. Use after a large corpus reload when old timestamps are meaningless (v1.94).""" from ..middleware import get_doc_freshness store = get_doc_freshness() before = store.summary() store.clear() get_obs().audit( "doc.freshness.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), prev_n_docs=before["n_docs"], ) return {"reset": True, "before": before} # ---- Query × doc affinity (v1.96) --------------------------------------- # ---- Unified analytics dump/restore (v1.97) ----------------------------- _ANALYTICS_DUMP_VERSION = 1 def admin_analytics_dump(): """Single-call snapshot of all 6 side-channel analytics stores (v1.97). Returns JSON with one key per store — versioned payload for migration and offline analysis. Stores included: * v1.82 doc_stats * v1.83 retriever_attribution * v1.84 cocitation * v1.89 query_stats * v1.94 doc_freshness * v1.96 query_doc_affinity The ``version`` field signals the dump format — kept as a single integer so restore can refuse incompatible schemas. """ from ..middleware import ( get_doc_stats, get_retriever_attribution, get_cocitation, get_query_stats, get_doc_freshness, get_query_doc_affinity, ) # doc_stats: every tracked doc + raw counters doc_store = get_doc_stats() doc_rows = [] # Walk via top_cited(n=10**9) — it returns all rows for row in doc_store.top_cited(n=10 ** 9): doc_rows.append({ "doc_id": row["doc_id"], "n_retrieved": row.get("n_retrieved", 0), "n_cited": row.get("n_cited", 0), "first_seen_at": row.get("first_seen_at"), "last_retrieved_at": row.get("last_retrieved_at"), "last_cited_at": row.get("last_cited_at"), }) # retriever_attribution ra = get_retriever_attribution() ra_rows = [ {k: v for k, v in row.items() if k != "cite_rate"} for row in ra.all_stats() ] # cocitation cc = get_cocitation() cc_pairs = cc.top_pairs(n=10 ** 9) # query_stats qs = get_query_stats() qs_rows = [] for row in qs.top(n=10 ** 9): qs_rows.append({ "fingerprint": row["fingerprint"], "sample": row["sample"], "count": row["count"], "first_seen_at": row["first_seen_at"], "last_seen_at": row["last_seen_at"], "sum_sources": row["sum_sources"], "sum_latency_ms": row["sum_latency_ms"], }) # doc_freshness fs = get_doc_freshness() fs_rows = [] # No top() here, walk via data dict — use all stale(older_than_days=0) # which gives every row sorted oldest-first; take as-is. import time as _t now = _t.time() for row in fs.stale(older_than_days=0, now=now): fs_rows.append({ "doc_id": row["doc_id"], "added_at": row["added_at"], "last_modified_at": row.get("last_modified_at"), "n_modifications": row.get("n_modifications", 0), }) # query_doc_affinity qda = get_query_doc_affinity() qda_pairs: List[Dict[str, Any]] = [] qda_summary = qda.summary() # Use the inverted index: for each query fingerprint, enumerate # its docs. Cheap — no O(N*M) scan. for fp in list(qda._by_query.keys()): # noqa: SLF001 for row in qda.top_docs_for_query(fp, n=10 ** 9): qda_pairs.append({ "fingerprint": fp, "doc_id": row["doc_id"], "count": row["count"], "last_seen": row.get("last_seen"), }) return { "version": _ANALYTICS_DUMP_VERSION, "exported_at": now, "doc_stats": { "rows": doc_rows, "n_rows": len(doc_rows), }, "retriever_attribution": { "rows": ra_rows, "n_rows": len(ra_rows), }, "cocitation": { "pairs": cc_pairs, "n_pairs": len(cc_pairs), "n_events": cc.summary().get("n_events", 0), }, "query_stats": { "rows": qs_rows, "n_rows": len(qs_rows), }, "doc_freshness": { "rows": fs_rows, "n_rows": len(fs_rows), }, "query_doc_affinity": { "pairs": qda_pairs, "n_pairs": len(qda_pairs), "n_events": qda_summary.get("n_events", 0), }, } class AnalyticsRestoreRequest(BaseModel): dump: Dict[str, Any] replace: bool = True # default: wipe before restore @app.post("/v1/admin/analytics/restore") def admin_analytics_restore(req: AnalyticsRestoreRequest, request: Request): """Rebuild the 6 analytics stores from a v1.97 dump. ``replace=True`` (default) wipes each store before loading — gives exact-match state after restore. ``replace=False`` merges on top of existing data (fingerprints / doc_ids collide → counters SUM). Useful for aggregating traffic across prod nodes. Refuses to restore from dumps whose ``version`` doesn't match the current ``_ANALYTICS_DUMP_VERSION`` — schema compatibility gate. """ from ..middleware import ( get_doc_stats, set_doc_stats, DocumentStatsStore, get_retriever_attribution, set_retriever_attribution, RetrieverAttributionStore, get_cocitation, set_cocitation, CoCitationStore, get_query_stats, set_query_stats, QueryStatsStore, get_doc_freshness, set_doc_freshness, DocFreshnessStore, get_query_doc_affinity, set_query_doc_affinity, QueryDocAffinityStore, ) dump = req.dump or {} ver = dump.get("version") if ver != _ANALYTICS_DUMP_VERSION: raise HTTPException( status_code=400, detail={"error": "version mismatch", "expected": _ANALYTICS_DUMP_VERSION, "got": ver}, ) totals: Dict[str, int] = {} # doc_stats if req.replace: set_doc_stats(DocumentStatsStore()) doc_store = get_doc_stats() doc_rows = (dump.get("doc_stats") or {}).get("rows") or [] for row in doc_rows: did = row.get("doc_id") if not did: continue # Directly seed inner state (avoids driving up counters via # record() N times when N can be huge on real dumps) from ..middleware.doc_stats import DocumentStats as _DS doc_store._data[did] = _DS( # noqa: SLF001 doc_id=did, n_retrieved=int(row.get("n_retrieved", 0)), n_cited=int(row.get("n_cited", 0)), first_seen_at=row.get("first_seen_at"), last_retrieved_at=row.get("last_retrieved_at"), last_cited_at=row.get("last_cited_at"), ) totals["doc_stats"] = len(doc_rows) # retriever_attribution if req.replace: set_retriever_attribution(RetrieverAttributionStore()) ra = get_retriever_attribution() from ..middleware.retriever_attribution import RetrieverStats as _RS for row in (dump.get("retriever_attribution") or {}).get("rows") or []: nm = row.get("name") if not nm: continue ra._data[nm] = _RS( # noqa: SLF001 name=nm, n_contributed=int(row.get("n_contributed", 0)), n_doc_contributions=int(row.get("n_doc_contributions", 0)), n_cited_contributions=int(row.get("n_cited_contributions", 0)), first_seen_at=row.get("first_seen_at"), last_seen_at=row.get("last_seen_at"), ) totals["retriever_attribution"] = len( (dump.get("retriever_attribution") or {}).get("rows") or []) # cocitation — replay via record() (preserves partner index) if req.replace: set_cocitation(CoCitationStore()) cc = get_cocitation() n_cc = 0 for pair in (dump.get("cocitation") or {}).get("pairs") or []: count = int(pair.get("count", 0)) a = pair.get("a"); b = pair.get("b") if not a or not b or count <= 0: continue # Bump the pair count ``count`` times via direct state access — # replay would create a new n_events per iteration which skews # the counter. from ..middleware.cocitation import _pair_key as _pk k = _pk(a, b) cc._pairs[k] = int(cc._pairs.get(k, 0)) + count # noqa: SLF001 cc._partners[a].add(b) # noqa: SLF001 cc._partners[b].add(a) # noqa: SLF001 ls = pair.get("last_seen") if ls is not None: cc._last_seen[k] = float(ls) # noqa: SLF001 n_cc += 1 n_events_cc = int((dump.get("cocitation") or {}).get("n_events", 0)) if n_events_cc: cc._n_events = cc._n_events + n_events_cc # noqa: SLF001 totals["cocitation_pairs"] = n_cc # query_stats if req.replace: set_query_stats(QueryStatsStore()) qs = get_query_stats() from ..middleware.query_stats import QueryStats as _QS for row in (dump.get("query_stats") or {}).get("rows") or []: fp = row.get("fingerprint") if not fp: continue existing = qs._data.get(fp) # noqa: SLF001 if existing and not req.replace: # Merge: add counts; keep earliest first_seen_at; latest # last_seen_at; sum sources/latency. existing.count += int(row.get("count", 0)) existing.sum_sources += int(row.get("sum_sources", 0)) existing.sum_latency_ms += float( row.get("sum_latency_ms", 0.0)) if (row.get("first_seen_at") is not None and (existing.first_seen_at is None or row["first_seen_at"] < existing.first_seen_at)): existing.first_seen_at = row["first_seen_at"] if (row.get("last_seen_at") is not None and (existing.last_seen_at is None or row["last_seen_at"] > existing.last_seen_at)): existing.last_seen_at = row["last_seen_at"] else: qs._data[fp] = _QS( # noqa: SLF001 fingerprint=fp, sample=row.get("sample", ""), count=int(row.get("count", 0)), first_seen_at=row.get("first_seen_at"), last_seen_at=row.get("last_seen_at"), sum_sources=int(row.get("sum_sources", 0)), sum_latency_ms=float(row.get("sum_latency_ms", 0.0)), ) totals["query_stats"] = len( (dump.get("query_stats") or {}).get("rows") or []) # doc_freshness if req.replace: set_doc_freshness(DocFreshnessStore()) fs = get_doc_freshness() from ..middleware.doc_freshness import DocFreshness as _DF for row in (dump.get("doc_freshness") or {}).get("rows") or []: did = row.get("doc_id") if not did: continue fs._data[did] = _DF( # noqa: SLF001 doc_id=did, added_at=float(row.get("added_at") or 0.0), last_modified_at=row.get("last_modified_at"), n_modifications=int(row.get("n_modifications", 0)), ) totals["doc_freshness"] = len( (dump.get("doc_freshness") or {}).get("rows") or []) # query_doc_affinity if req.replace: set_query_doc_affinity(QueryDocAffinityStore()) qda = get_query_doc_affinity() n_qda = 0 for pair in (dump.get("query_doc_affinity") or {}).get("pairs") or []: fp = pair.get("fingerprint"); did = pair.get("doc_id") count = int(pair.get("count", 0)) if not fp or not did or count <= 0: continue k = (fp, did) qda._pairs[k] = int(qda._pairs.get(k, 0)) + count # noqa: SLF001 qda._by_query[fp].add(did) # noqa: SLF001 qda._by_doc[did].add(fp) # noqa: SLF001 ls = pair.get("last_seen") if ls is not None: qda._last_seen[k] = float(ls) # noqa: SLF001 n_qda += 1 n_events_qda = int( (dump.get("query_doc_affinity") or {}).get("n_events", 0)) if n_events_qda: qda._n_events = qda._n_events + n_events_qda # noqa: SLF001 totals["query_doc_affinity_pairs"] = n_qda get_obs().audit( "analytics.restore", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), version=ver, replace=bool(req.replace), totals=totals, ) return { "restored": True, "replace": bool(req.replace), "version": ver, "totals": totals, } def admin_query_doc_affinity_summary(): """Rollup of query × doc affinity matrix: n_events, n_pairs, n_queries, n_docs, total_count (v1.96).""" from ..middleware import get_query_doc_affinity return get_query_doc_affinity().summary() def admin_query_top_docs(fingerprint: str, n: int = 10): """Which docs does this query most often cite? (v1.96). ``fingerprint`` is the v1.89 canonical fingerprint. For a text query, run it through ``_fingerprint(canonical)`` first — or use v1.89 lookup endpoints.""" from ..middleware import get_query_doc_affinity, get_query_stats store = get_query_doc_affinity() rows = store.top_docs_for_query(fingerprint, n=int(n)) # Bonus: include the sample text from v1.89 if known qs_row = get_query_stats().get(fingerprint) return { "fingerprint": fingerprint, "sample": qs_row.sample if qs_row else None, "top_docs": rows, } def admin_doc_top_queries(doc_id: str, n: int = 10): """Which queries lead to this doc being cited? (v1.96). Returns fingerprints + counts, with each fingerprint's sample text attached if still known to v1.89's query_stats store.""" from ..middleware import get_query_doc_affinity, get_query_stats store = get_query_doc_affinity() rows = store.top_queries_for_doc(doc_id, n=int(n)) # Enrich with query text samples from v1.89 qs = get_query_stats() for row in rows: r = qs.get(row["fingerprint"]) row["sample"] = r.sample if r else None return {"doc_id": doc_id, "top_queries": rows} def admin_query_doc_affinity_reset(request: Request): """Wipe the query × doc affinity matrix + audit (v1.96).""" from ..middleware import get_query_doc_affinity store = get_query_doc_affinity() before = store.summary() store.clear() get_obs().audit( "query_doc_affinity.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), prev_n_pairs=before["n_pairs"], prev_total_count=before["total_count"], ) return {"reset": True, "before": before} def admin_doc_update_priorities( n: int = 20, min_cited: int = 1, older_than_days: float = 0.0, alpha: float = 1.0, ): """Rank docs by "needs update" priority (v1.95) — cross-cut of v1.82 doc_stats × v1.94 doc_freshness. Priority score = ``n_cited * (age_days ** alpha)``. High traffic plus stale content → high score. Docs get flagged from both sides: low cite = not worth the review; recently-modified = doesn't need review yet. ``alpha`` tunes how heavily age dominates (α>1 = age matters more; α<1 = traffic matters more). Filters: * ``min_cited`` — minimum citation count (default 1 — skip cold docs entirely, they're content-audit's problem, not update's). * ``older_than_days`` — minimum age in days (default 0 — let the caller decide what ''stale'' means). * ``n`` — cap on rows returned. * ``alpha`` — exponent on age_days in the score. 1.0 is linear (balanced); raise to prioritize stale content harder. """ from ..middleware import get_doc_stats, get_doc_freshness import time as _t docs = get_doc_stats() fresh = get_doc_freshness() now = _t.time() # Walk the smaller side (whichever has fewer entries) and join. # We go through doc_stats (usually <= corpus size) because stale # docs with zero traffic are noise here — we want things BOTH # sides know about. rows: List[Dict[str, Any]] = [] for doc_row in docs.top_cited(n=10 ** 9): did = doc_row["doc_id"] if doc_row["n_cited"] < int(min_cited): continue f = fresh.get(did) if f is None: continue ref_ts = f.last_modified_at or f.added_at age_days = max(0.0, (now - ref_ts) / 86400.0) if age_days < float(older_than_days): continue try: aged = age_days ** float(alpha) except (OverflowError, ValueError): aged = age_days score = float(doc_row["n_cited"]) * aged rows.append({ "doc_id": did, "n_cited": doc_row["n_cited"], "n_retrieved": doc_row["n_retrieved"], "cite_rate": doc_row.get("cite_rate", 0.0), "added_at": f.added_at, "last_modified_at": f.last_modified_at, "age_days": round(age_days, 2), "n_modifications": f.n_modifications, "priority_score": round(score, 2), }) rows.sort(key=lambda r: r["priority_score"], reverse=True) rows = rows[:max(0, int(n))] return { "n_candidates": len(rows), "n": int(n), "min_cited": int(min_cited), "older_than_days": float(older_than_days), "alpha": float(alpha), "candidates": rows, } def get_document_freshness(doc_id: str): """Per-doc freshness: added_at, last_modified_at, n_modifications, age_s, age_days (v1.94).""" from ..middleware import get_doc_freshness row = get_doc_freshness().get(doc_id) if row is None: raise HTTPException( status_code=404, detail={"error": "no freshness record for doc", "doc_id": doc_id}, ) return row.to_dict() def get_document_related(doc_id: str, n: int = 10): """Docs most commonly co-cited with ``doc_id`` in actual traffic (v1.84). Empirical 'related' — purely behavioural.""" from ..middleware import get_cocitation related = get_cocitation().related(doc_id, n=int(n)) return {"doc_id": doc_id, "related": related} def get_document_stats(doc_id: str): """Per-document citation + retrieval counters (v1.82).""" from ..middleware import get_doc_stats row = get_doc_stats().get(doc_id) if row is None: raise HTTPException( status_code=404, detail={"error": "no stats for doc", "doc_id": doc_id}, ) return row.to_dict() def get_document_chunks(doc_id: str, chunker: Optional[str] = None): """Return the chunks the retrievers actually index for this doc. Re-runs the configured chunker on-demand; pass ``?chunker=sentence`` to preview alternative chunkings without changing the index.""" d = _pipeline.get_document(doc_id) if d is None: raise HTTPException(status_code=404, detail="document not found") chunks = _pipeline.get_chunks(doc_id, chunker=chunker) return { "doc_id": doc_id, "n_chunks": len(chunks), "chunker": chunker or getattr(_pipeline, "_chunker_last", "fixed"), "chunks": chunks, } def get_document(doc_id: str): d = _pipeline.get_document(doc_id) if d is None: # Fall back to the live unified pipeline (set_pipeline) — different # singleton when /v1/data/load_jsonl was used vs autoload path. try: from ..pipeline import get_pipeline as _gp d = _gp().get_document(doc_id) except Exception: d = None if d is None: raise HTTPException(status_code=404, detail="document not found") # Lazy-text resolution: when TAU_RAG_LAZY_TEXT=1 and metadata flagged # the doc as lazy, the in-memory `text` is empty by design — fetch # it from the SQLite-backed LazyTextStore on demand. text = d.text or "" if not text and (d.metadata or {}).get("_lazy_text"): try: from ..pipeline import get_pipeline as _gp text = _gp().get_text(d.id) or "" except Exception: pass return {"id": d.id, "text": text, "metadata": d.metadata or {}} def replace_document(doc_id: str, body: DocumentBody): if body.id != doc_id: raise HTTPException(status_code=422, detail={"path_id_mismatch": {"url": doc_id, "body": body.id}}) try: # Reuse the doc-size validator validate_doc_list([body]) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) doc = Document(id=body.id, text=body.text, metadata=body.metadata) ok = _pipeline.replace_document(doc) if not ok: raise HTTPException(status_code=404, detail="document not found") return {"replaced": True, "id": doc_id} def delete_document(doc_id: str): ok = _pipeline.delete_document(doc_id) if not ok: raise HTTPException(status_code=404, detail="document not found") return {"deleted": True, "id": doc_id} def clear_documents(request: Request): n = _pipeline.clear_documents() get_obs().audit( "documents.clear", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), removed=n, ) return {"cleared": True, "removed": n} # ---- bulk ingest (JSONL + CSV streaming) ------------------------------ import csv as _csv # noqa: E402 import io as _io # noqa: E402 import json # noqa: E402 def _parse_jsonl(text: str): """Yield (row_num, doc_dict_or_error) tuples for a JSONL payload. Blank lines and `#` comment lines are skipped silently.""" for i, line in enumerate(text.splitlines(), start=1): line = line.strip() if not line or line.startswith("#"): continue try: obj = json.loads(line) if not isinstance(obj, dict): raise ValueError("row is not a JSON object") yield i, obj except Exception as e: yield i, {"__error__": str(e)} def _parse_csv(text: str): """Yield (row_num, doc_dict_or_error) tuples for a CSV payload. Expects columns: id, text, and optional metadata columns merged into a single metadata dict.""" reader = _csv.DictReader(_io.StringIO(text)) if reader.fieldnames is None or "id" not in reader.fieldnames \ or "text" not in reader.fieldnames: raise HTTPException( status_code=400, detail={"csv_missing_columns": "required: 'id' and 'text'"}, ) meta_cols = [c for c in reader.fieldnames if c not in ("id", "text")] for i, row in enumerate(reader, start=2): # row 1 is header try: metadata = {c: row[c] for c in meta_cols if row.get(c) not in (None, "")} yield i, {"id": row["id"], "text": row["text"], "metadata": metadata} except Exception as e: yield i, {"__error__": str(e)} async def bulk_ingest_documents(request: Request): """Bulk ingest — JSONL (one ``{"id","text","metadata"}`` per line) or CSV (columns: id, text, [any other] → metadata). Partial success semantics: each row parsed+validated independently, successes indexed into the pipeline, failures reported with row numbers. Content-Type: * ``application/x-ndjson`` or ``application/jsonl`` → JSONL * ``text/csv`` → CSV * anything else → JSONL (default) """ ct = (request.headers.get("content-type") or "").split(";", 1)[0].strip().lower() raw = (await request.body()).decode("utf-8", errors="replace") if ct in ("text/csv",): iterator = _parse_csv(raw) else: iterator = _parse_jsonl(raw) # Enforce per-batch size limit from v1.35 from .errors import Limits accepted: List[Document] = [] errors: List[Dict[str, Any]] = [] row_n = 0 for row_num, obj in iterator: row_n += 1 if "__error__" in obj: errors.append({"row": row_num, "error": obj["__error__"]}) continue text = obj.get("text") if not isinstance(text, str) or not text: errors.append({"row": row_num, "error": "missing or empty 'text'"}) continue if len(text) > Limits.max_doc_text_len: errors.append({"row": row_num, "error": f"text exceeds {Limits.max_doc_text_len} chars"}) continue if len(accepted) >= Limits.max_docs_per_batch: errors.append({"row": row_num, "error": f"batch cap reached (max {Limits.max_docs_per_batch})"}) continue accepted.append(Document( id=obj.get("id") or f"row-{row_num}", text=text, metadata=obj.get("metadata") or {}, )) chunks = _pipeline.add_documents(accepted) if accepted else 0 get_obs().audit( "documents.bulk_ingest", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), content_type=ct, rows_total=row_n, accepted=len(accepted), errors=len(errors), ) return { "accepted": [d.id for d in accepted], "errors": errors, "added_chunks": chunks, "rows_total": row_n, } def search(req: SearchRequest): try: validate_query_text(req.query) validate_k(req.k) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) try: strategy = Strategy(req.strategy) except ValueError: raise HTTPException(status_code=400, detail={"bad_strategy": req.strategy}) q = Query( text=req.query, lang=req.lang, filters=req.filters, strategy=strategy, k=req.k, rerank_k=req.rerank_k, ) per = _pipeline.retrievers.search_per_retriever(q, req.k) return { "per_retriever": { name: [{"doc": r.chunk.doc_id, "chunk": r.chunk.chunk_id, "score": r.score, "rank": r.rank, "text": r.chunk.text[:300]} for r in lst] for name, lst in per.items() } } # ---- batch query (v1.54) ------------------------------------------------- def batch_query(req: BatchQueryRequest, request: Request): """Run many queries in a single HTTP call — useful for eval runners, benchmarks, and bulk re-indexing workflows. Cap: ``Limits.max_docs_per_batch`` items per call (reusing the doc-limit env knob). Each item is validated independently; per-item errors are returned alongside successful responses. Cache + rate-limit apply to the overall request, not per-item (so a single admin call can sweep many queries without tripping the limiter). """ from .errors import Limits if not req.queries: return {"n": 0, "results": [], "errors": [], "total_ms": 0} if len(req.queries) > Limits.max_docs_per_batch: raise HTTPException( status_code=413, detail=f"too many queries — max {Limits.max_docs_per_batch} per batch", ) import time as _t t0 = _t.time() results: List[Dict[str, Any]] = [] errors: List[Dict[str, Any]] = [] for i, item in enumerate(req.queries, start=1): try: validate_query_text(item.query) validate_k(item.k) strategy = Strategy(item.strategy) q = Query(text=item.query, lang=item.lang, filters=item.filters, strategy=strategy, k=item.k, rerank_k=item.rerank_k) resp = _pipeline.run(q) try: omega = float(resp.signals.omega) if resp.signals else None except Exception: omega = None verif = getattr(resp, "verification", None) results.append({ "index": i, "query": item.query, "answer": resp.answer or "", "sources": list(resp.sources or []), "omega": omega, "passed": bool(getattr(verif, "passed", False)) if verif else None, }) except HTTPException as e: errors.append({"index": i, "error": str(e.detail), "status": e.status_code}) except Exception as e: errors.append({"index": i, "error": f"{type(e).__name__}: {e}"[:240], "status": 500}) total_ms = (_t.time() - t0) * 1000.0 get_obs().audit( "batch.query", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), n=len(req.queries), errors=len(errors), total_ms=round(total_ms, 2), ) return { "n": len(req.queries), "results": results, "errors": errors, "total_ms": round(total_ms, 2), "avg_ms": round(total_ms / max(1, len(req.queries)), 2), } def generate_stream(req: SearchRequest, request: Request): """Server-Sent Events version of /v1/generate. Emits events in order: event: retrieved data: {"doc_ids": [...], "count": N} event: answer data: {"chunk": "word "} (repeated) event: done data: {"answer","sources","signals","verification", "passed","omega"} event: error data: {"code","message"} (on failure) Flow: runs the full pipeline synchronously (it's ~ms for extractive) and streams the staged output. Each SSE event is . Clients: * browser: EventSource('/v1/generate/stream' ... POST) * curl --no-buffer -N -H 'Content-Type: application/json' \ -d '{"query":"..."}' http://localhost:8000/v1/generate/stream * SDK: for ev in client.stream_query("..."): ... """ try: validate_query_text(req.query) validate_k(req.k) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) try: strategy = Strategy(req.strategy) except ValueError: raise HTTPException(status_code=400, detail={"bad_strategy": req.strategy}) from fastapi.responses import StreamingResponse import json as _json def _sse(event: str, data: Any) -> str: return f"event: {event}\ndata: {_json.dumps(data, ensure_ascii=False)}\n\n" def _event_gen(): try: q = Query(text=req.query, lang=req.lang, filters=req.filters, strategy=strategy, k=req.k, rerank_k=req.rerank_k) resp = _pipeline.run(q) # Stage 1: retrieval results retrieved = [] seen = set() for c in getattr(resp, "retrieved", []) or []: did = getattr(getattr(c, "chunk", None), "doc_id", None) if did and did not in seen: retrieved.append(did) seen.add(did) yield _sse("retrieved", {"doc_ids": retrieved, "count": len(retrieved)}) # Stage 2: answer streamed word-by-word answer = resp.answer or "" words = answer.split(" ") for w in words: if not w: continue yield _sse("answer", {"chunk": w + " "}) # Stage 3: final envelope try: omega = float(resp.signals.omega) if resp.signals else None except Exception: omega = None verif = getattr(resp, "verification", None) yield _sse("done", { "answer": answer, "sources": list(resp.sources or []), "omega": omega, "passed": bool(getattr(verif, "passed", False)) if verif else None, "verification": (verif.to_dict() if hasattr(verif, "to_dict") else getattr(verif, "__dict__", None)), }) except Exception as e: yield _sse("error", { "code": "internal_error", "message": f"{type(e).__name__}: {e}"[:240], }) return StreamingResponse( _event_gen(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "X-Accel-Buffering": "no", # nginx — flush immediately }, ) def generate(req: SearchRequest): try: validate_query_text(req.query) validate_k(req.k) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) try: strategy = Strategy(req.strategy) except ValueError: raise HTTPException(status_code=400, detail={"bad_strategy": req.strategy}) # Cache hit short-circuit cache = get_cache() cache_key = cache.make_key( f"{req.query}|{req.strategy}|{req.k}|{req.rerank_k}", req.lang, req.filters, ) cached = cache.get(cache_key) if cached is not None: cached = dict(cached) cached["_cache"] = "hit" return cached q = Query( text=req.query, lang=req.lang, filters=req.filters, strategy=strategy, k=req.k, rerank_k=req.rerank_k, ) out = _pipeline.run(q).to_dict() cache.put(cache_key, out) out = dict(out); out["_cache"] = "miss" return out # ---- saved query presets (v1.66) ---------------------------------------- def list_query_presets(): """List all saved query presets. Unauthenticated (queries are public).""" from ..presets import get_preset_store presets = get_preset_store().list_all() return {"count": len(presets), "presets": presets} def get_query_preset(name: str): from ..presets import get_preset_store p = get_preset_store().get(name) if p is None: raise HTTPException(status_code=404, detail={"preset_not_found": name}) return p.to_dict() def save_query_preset(name: str, body: QueryPresetBody, request: Request): from ..presets import QueryPreset, get_preset_store try: validate_query_text(body.query) validate_k(body.k) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) preset = QueryPreset( name=name, query=body.query, k=body.k, rerank_k=body.rerank_k, strategy=body.strategy, lang=body.lang, notes=body.notes, ) get_preset_store().put(preset) get_obs().audit( "query_preset.put", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, ) return preset.to_dict() def delete_query_preset(name: str, request: Request): from ..presets import get_preset_store ok = get_preset_store().remove(name) if not ok: raise HTTPException(status_code=404, detail={"preset_not_found": name}) get_obs().audit( "query_preset.remove", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, ) return {"removed": True, "name": name} def run_query_preset(name: str): """Execute a saved preset — equivalent to ``POST /v1/generate`` with the stored parameters. Returns the full /generate response.""" from ..presets import get_preset_store p = get_preset_store().get(name) if p is None: raise HTTPException(status_code=404, detail={"preset_not_found": name}) req = SearchRequest(query=p.query, k=p.k, rerank_k=p.rerank_k, strategy=p.strategy, lang=p.lang, filters={}) return generate(req) def generate_timings(req: SearchRequest): """Run the query but return only the per-stage latency breakdown + Ω — no answer text, no candidate list. For ops/profiling workflows that need to know WHERE time is spent, not WHAT was returned. Response shape: { "query": "...", "timings_ms": {understand, retrieve, fuse, rerank, generate, verify, signals, total}, "omega": 0.67, "n_sources": 2, "cache": "miss" } """ try: validate_query_text(req.query) validate_k(req.k) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) try: strategy = Strategy(req.strategy) except ValueError: raise HTTPException(status_code=400, detail={"bad_strategy": req.strategy}) q = Query( text=req.query, lang=req.lang, filters=req.filters, strategy=strategy, k=req.k, rerank_k=req.rerank_k, ) resp = _pipeline.run(q) try: omega = float(resp.signals.omega) if resp.signals else None except Exception: omega = None return { "query": req.query, "timings_ms": dict(resp.timing_ms or {}), "omega": omega, "n_sources": len(resp.sources or []), "passed": (bool(resp.verification.passed) if resp.verification else None), } @app.get("/v1/admin/stats") def admin_stats(): from ..middleware import get_webhook_dispatcher return { "cache": get_cache().stats(), "rate_limiter": get_limiter().stats(), "observability": get_obs().stats(), "cached_queries": len(_pipeline.cache), "webhook": get_webhook_dispatcher().stats(), } def admin_webhook_stats(): """Report the audit webhook dispatcher state (v1.71) including the circuit breaker status (v1.79).""" from ..middleware import get_webhook_dispatcher return get_webhook_dispatcher().stats() # ---- alert rules (v1.80) ------------------------------------------------- class AlertRuleRequest(BaseModel): name: str metric: str op: str threshold: float window_s: float = 300.0 cooldown_s: float = 600.0 enabled: bool = True description: str = "" def admin_alerts_list(): """List all configured alert rules (v1.80).""" from ..middleware import get_alert_store return {"rules": [r.to_dict() for r in get_alert_store().list_all()]} @app.put("/v1/admin/alerts/{name}") def admin_alerts_put(name: str, req: AlertRuleRequest, request: Request): """Create or update an alert rule. ``name`` in the path wins over the body — consistent with PUT semantics.""" from ..middleware import get_alert_store, AlertRule try: rule = AlertRule( name=name, metric=req.metric, op=req.op, threshold=float(req.threshold), window_s=float(req.window_s), cooldown_s=float(req.cooldown_s), enabled=bool(req.enabled), description=req.description, ) rule = get_alert_store().put(rule) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) get_obs().audit( "alert.rule.put", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, metric=req.metric, op=req.op, threshold=float(req.threshold), ) return rule.to_dict() @app.delete("/v1/admin/alerts/{name}") def admin_alerts_delete(name: str, request: Request): from ..middleware import get_alert_store ok = get_alert_store().delete(name) if not ok: raise HTTPException(status_code=404, detail={"error": "alert rule not found", "name": name}) get_obs().audit( "alert.rule.delete", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, ) return {"deleted": True, "name": name} def admin_alerts_scheduler_status(): """Report the background alert-evaluator state (v1.81).""" from ..middleware import get_alert_scheduler sched = get_alert_scheduler() if sched is None: return {"enabled": False, "is_running": False} return {"enabled": True, **sched.status()} def admin_alerts_evaluate(request: Request): """Run every enabled rule once against the current metrics history. Returns a list of verdicts; rules that fire also emit an ``alert.fired`` audit event (which flows through the webhook dispatcher per v1.71).""" from ..middleware import ( get_alert_store, get_metrics_history, evaluate_all, ) actor = request.headers.get("x-api-key") rid = getattr(request.state, "request_id", None) def _on_fire(verdict): get_obs().audit( "alert.fired", actor_key=actor, request_id=rid, rule=verdict["rule"], reason=verdict["reason"], latest_value=verdict["latest_value"], n_samples=verdict["n_samples"], ) verdicts = evaluate_all( get_alert_store(), get_metrics_history(), on_fire=_on_fire, ) return { "verdicts": verdicts, "n_fired": sum(1 for v in verdicts if v["fired"]), "n_suppressed": sum(1 for v in verdicts if v["suppressed"]), "n_evaluated": len(verdicts), } def admin_webhook_breaker_reset(request: Request): """Manually force the audit-webhook circuit breaker back to CLOSED and clear its failure counters (v1.79). Useful after fixing a downstream outage without waiting for the cooldown probe.""" from ..middleware import get_webhook_dispatcher disp = get_webhook_dispatcher() before = disp.breaker.stats() disp.breaker.reset() after = disp.breaker.stats() get_obs().audit( "webhook.breaker.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), prev_state=before["state"], prev_fail_count=before["fail_count"], ) return {"reset": True, "before": before, "after": after} def admin_metrics_history( since: Optional[float] = None, until: Optional[float] = None, metric: Optional[str] = None, limit: int = 1000, ): """Return sampled time-series metrics (v1.78). Query params: * ``since`` / ``until`` — Unix timestamps bounding the window. * ``metric`` — dotted path (e.g. ``obs.p95_ms``, ``cache.hit_rate``, ``limiter.denied``) to project each sample to ``{ts, value}``. Omit for full samples. * ``limit`` — hard cap on rows returned (default 1000). The sampler is off by default; enable via ``TAU_RAG_METRICS_HISTORY_INTERVAL_SEC=10`` at server start, or call ``MetricsHistorySampler(h, interval_s=10.0).start()`` directly. """ from ..middleware import get_metrics_history, get_metrics_sampler h = get_metrics_history() rows = h.history(since=since, until=until, metric=metric) # Enforce upper bound — take the newest ``limit`` rows. if limit and len(rows) > int(limit): rows = rows[-int(limit):] sampler = get_metrics_sampler() return { "samples": rows, "count": len(rows), "capacity": h.capacity(), "metric": metric, "sampler": (sampler.status() if sampler else {"is_running": False, "interval_s": None}), } def admin_metrics_history_sample_now(request: Request): """Force one immediate sample (useful for tests and 'capture before change' workflows). Returns the sample that was just captured.""" from ..middleware import get_metrics_history row = get_metrics_history().sample() get_obs().audit( "metrics.sample", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ts=row["ts"], ) return row def admin_cache_clear(request: Request): get_cache().clear() _pipeline.cache.clear() get_obs().audit( "cache.clear", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ) return {"cleared": True} def admin_logs(n: int = 50, event_type: Optional[str] = None): """Tail the in-memory request/audit log. `event_type=audit` to see just admin actions, `event_type=request` for HTTP traffic.""" return {"logs": get_obs().tail(n=n, event_type=event_type)} def admin_audit_export( since: Optional[float] = None, until: Optional[float] = None, event_type: Optional[str] = None, format: str = "jsonl", limit: int = 10_000, ): """Export the observability ring buffer as JSONL (default) or JSON. Query params: * ``since`` — Unix timestamp, inclusive lower bound on entry.ts * ``until`` — Unix timestamp, exclusive upper bound * ``event_type`` — ``audit`` | ``request`` (omit = both) * ``format`` — ``jsonl`` (default, streamable) or ``json`` * ``limit`` — hard cap on rows returned (default 10,000) Returns a file attachment with ``Content-Disposition``; intended to be piped to compliance storage, fed to a SIEM, or spot-checked with jq. """ from fastapi.responses import PlainTextResponse import json as _json rows = get_obs().tail(n=max(1, int(limit)), event_type=event_type) # Apply time filters (tail already returns newest-last) if since is not None: rows = [r for r in rows if float(r.get("ts") or 0) >= float(since)] if until is not None: rows = [r for r in rows if float(r.get("ts") or 0) < float(until)] if format == "json": body = _json.dumps(rows, ensure_ascii=False, indent=2) media = "application/json" suffix = "json" else: # default jsonl body = "\n".join(_json.dumps(r, ensure_ascii=False) for r in rows) if rows: body += "\n" media = "application/x-ndjson" suffix = "jsonl" filename = f"tau-rag-audit.{suffix}" return PlainTextResponse( body, media_type=media, headers={ "Content-Disposition": f'attachment; filename="{filename}"', "X-Entry-Count": str(len(rows)), }, ) def admin_logs_stream( event_type: Optional[str] = None, heartbeat_s: float = 15.0, replay_last: int = 0, max_events: int = 0, max_heartbeats: int = 0, ): """Live SSE tail of the observability log (v1.75). Query params: * ``event_type`` — ``audit`` | ``request`` (omit = both) * ``heartbeat_s`` — emit a ``:heartbeat`` comment every N seconds so proxies/load balancers don't idle-kill the connection (default 15s). * ``replay_last`` — on connect, emit the last N buffered entries before live-tailing (0 = live only). * ``max_events`` — stop after this many ``log`` events (0 = unbounded). Useful for bounded tails and deterministic testing. * ``max_heartbeats`` — stop after this many heartbeat ticks (0 = unbounded). Useful for "give me whatever's there within N seconds, then close". Event names: * ``log`` — new entry (data is the same dict as ``/v1/admin/logs``) * (heartbeat is a ``:`` SSE comment line, per the SSE spec) Client disconnect frees the subscriber queue; drop-oldest on a slow reader so the request path is never back-pressured. """ from fastapi.responses import StreamingResponse import json as _json import queue as _q obs = get_obs() sub = obs.subscribe(maxsize=256) def _sse(event: str, data: Any) -> str: return f"event: {event}\ndata: {_json.dumps(data, ensure_ascii=False)}\n\n" def _gen(): emitted_logs = 0 heartbeats = 0 try: # Replay tail first (filtered by event_type if set) if replay_last and replay_last > 0: for row in obs.tail(n=int(replay_last), event_type=event_type): yield _sse("log", row) emitted_logs += 1 if max_events and emitted_logs >= max_events: return # Live loop — block up to heartbeat_s for next entry; if # nothing arrived emit a comment keep-alive. hb = max(0.05, float(heartbeat_s)) while True: try: row = sub.get(timeout=hb) except _q.Empty: heartbeats += 1 yield ": heartbeat\n\n" if max_heartbeats and heartbeats >= max_heartbeats: return continue if event_type and row.get("event_type") != event_type: continue yield _sse("log", row) emitted_logs += 1 if max_events and emitted_logs >= max_events: return except GeneratorExit: pass finally: obs.unsubscribe(sub) return StreamingResponse( _gen(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "X-Accel-Buffering": "no", }, ) def admin_ui(refresh: int = 0): """Unified read-only admin dashboard (HTML).""" from fastapi.responses import HTMLResponse from .admin_ui import render_admin_ui from .metrics import check_readiness _, ready_detail = check_readiness(_pipeline) html = render_admin_ui( cache_stats = get_cache().stats(), limiter_stats = get_limiter().stats(), obs_stats = get_obs().stats(), recent_requests = get_obs().tail(n=20, event_type="request"), recent_audits = get_obs().tail(n=20, event_type="audit"), keys = get_auth().list_keys(), documents = _pipeline.list_documents(), readiness = ready_detail, refresh_sec = int(refresh or 0), ) return HTMLResponse(html) # ---- snapshot / restore -------------------------------------------------- class SnapshotSaveRequest(BaseModel): path: Optional[str] = None rotate: int = 0 # v1.67: keep last N rotated generations class SnapshotLoadRequest(BaseModel): path: Optional[str] = None replace: bool = False generation: int = 0 # v1.67: load a specific rotated generation def _default_snapshot_path() -> str: return _os.environ.get("TAU_RAG_SNAPSHOT_PATH") or "runtime/snapshot.jsonl" # ---- eval harness endpoint ---------------------------------------------- class EvalRequest(BaseModel): cases: List[Dict[str, Any]] k: int = 5 thresholds: Optional[Dict[str, float]] = None def admin_eval(req: EvalRequest, request: Request): """Run the pipeline against a gold set inline; return aggregate metrics. Body: ``{"cases": [{id, query, expected_doc_ids, expected_claims?, lang?}, ...], "k": 5, "thresholds": {"recall@5": 0.7, ...}}`` """ from ..eval import GoldCase, run_eval cases = [GoldCase.from_dict(c) for c in req.cases] report = run_eval(_pipeline, cases, k=req.k) failures = report.fail_below(req.thresholds or {}) get_obs().audit( "eval.run", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), n_cases=report.n_cases, aggregate=report.aggregate, failures=failures, ) return { "n_cases": report.n_cases, "aggregate": report.aggregate, "latency_ms": report.latency_ms, "omega": report.omega, "per_case": [c.to_dict() for c in report.per_case], "failures": failures, "passed": len(failures) == 0, } def admin_snapshot_save(req: SnapshotSaveRequest, request: Request): path = req.path or _default_snapshot_path() summary = _pipeline.save_snapshot(path, rotate=int(req.rotate or 0)) get_obs().audit( "snapshot.save", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), **summary, ) return summary def admin_snapshot_history(path: Optional[str] = None, max_gens: int = 10): """List rotated snapshot generations on disk. ``path`` defaults to the env-configured snapshot path.""" from ..snapshot import list_snapshot_history base = path or _default_snapshot_path() return { "base_path": base, "generations": list_snapshot_history(base, max_gens=max_gens), } class SnapshotDiffRequest(BaseModel): a: str # path to snapshot A (the "before") b: str # path to snapshot B (the "after") include_details: bool = False # expand modified[] to {id, lens, hashes} def admin_snapshot_diff(req: SnapshotDiffRequest, request: Request): """Compare two snapshots (v1.77). Returns added / removed / modified doc IDs + per-snapshot metadata + a ``same_fingerprint`` boolean. Use cases: * CI: "what's new in this PR?" (diff main vs branch) * QA: "did we accidentally delete docs?" (diff yesterday vs today) * compliance: "what changed between quarters?" """ from ..snapshot import diff_snapshots import os as _os for label, p in (("a", req.a), ("b", req.b)): if not _os.path.exists(p): raise HTTPException( status_code=404, detail={"error": "snapshot not found", "which": label, "path": p}, ) result = diff_snapshots(req.a, req.b, include_details=bool(req.include_details)) get_obs().audit( "snapshot.diff", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), a=req.a, b=req.b, n_added=len(result["added"]), n_removed=len(result["removed"]), n_modified=len(result["modified"]), ) return result # ---- runtime config (v1.51) --------------------------------------------- class ConfigUpdateRequest(BaseModel): # Whitelist of tunable fields. Add to `_TUNABLE` below before exposing. updates: Dict[str, Any] # (path-in-Config, validator_fn, human_description) _TUNABLE: Dict[str, Any] = { "verify.min_omega": ( lambda v: isinstance(v, (int, float)) and 0.0 <= float(v) <= 1.0, "Minimum Ω signal for response.passed; 0.55 default", ), "verify.min_citation_coverage": ( lambda v: isinstance(v, (int, float)) and 0.0 <= float(v) <= 1.0, "Fraction of answer claims that must be cited; 0.8 default", ), } def _config_to_dict(cfg) -> Dict[str, Any]: """Recursively convert the nested Config dataclass to a plain dict.""" from dataclasses import is_dataclass, asdict if is_dataclass(cfg): return asdict(cfg) return dict(cfg) def _apply_config_update(cfg, key_path: str, value: Any) -> None: """Set ``cfg.a.b.c = value`` for a dotted ``key_path``.""" parts = key_path.split(".") obj = cfg for part in parts[:-1]: obj = getattr(obj, part) setattr(obj, parts[-1], value) def admin_get_config(): """Return the live effective configuration + list of tunable keys.""" return { "config": _config_to_dict(_pipeline.config), "tunable": { k: {"description": desc} for k, (_, desc) in _TUNABLE.items() }, } def admin_update_config(req: ConfigUpdateRequest, request: Request): """Update whitelisted config values at runtime. Clears the query cache so subsequent requests use the new thresholds.""" applied: Dict[str, Any] = {} rejected: List[Dict[str, Any]] = [] for key, new_val in req.updates.items(): entry = _TUNABLE.get(key) if entry is None: rejected.append({"key": key, "reason": "not in whitelist"}) continue validator, _desc = entry if not validator(new_val): rejected.append({"key": key, "reason": "validation failed", "value": new_val}) continue try: _apply_config_update(_pipeline.config, key, new_val) applied[key] = new_val except Exception as e: rejected.append({"key": key, "reason": f"{type(e).__name__}: {e}"}) # Clear query cache so future calls use the new threshold if applied: _pipeline.cache.clear() get_cache().clear() get_obs().audit( "config.update", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), applied=applied, rejected=rejected, ) return {"applied": applied, "rejected": rejected, "cache_cleared": bool(applied)} # ---- Hebrew synonyms CRUD (v1.52) -------------------------------------- class SynonymBody(BaseModel): canonical: str variants: List[str] def _maybe_autosave_synonyms() -> None: """If TAU_RAG_SYNONYMS_PATH is set, persist current dict to disk.""" path = _os.environ.get("TAU_RAG_SYNONYMS_PATH") if not path: return try: from ..core.hebrew_synonyms import save_synonyms_jsonl save_synonyms_jsonl(path) except Exception as _e: print(f"[tau-rag] synonym autosave failed: {_e}") def admin_list_synonyms(q: Optional[str] = None): """List synonym entries. ``?q=`` filters by substring in canonical or variants.""" from ..core.hebrew_synonyms import list_synonyms all_syn = list_synonyms() if q: qn = q.strip() all_syn = { k: v for k, v in all_syn.items() if qn in k or any(qn in x for x in v) } return {"count": len(all_syn), "synonyms": all_syn} class SynonymBulkRequest(BaseModel): entries: List[Dict[str, Any]] # [{canonical, variants}, ...] replace: bool = False def admin_bulk_synonyms(req: SynonymBulkRequest, request: Request): """Bulk add/replace synonyms. Each row: {canonical, variants: [...]}.""" from ..core.hebrew_synonyms import ( add_synonym, clear_synonyms as _clear, ) if req.replace: _clear() added = 0 errors: List[Dict[str, Any]] = [] for i, e in enumerate(req.entries, start=1): try: add_synonym(e["canonical"], list(e.get("variants") or [])) added += 1 except Exception as ex: errors.append({"row": i, "error": str(ex)}) _pipeline.cache.clear() get_cache().clear() _maybe_autosave_synonyms() get_obs().audit( "synonyms.bulk", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), added=added, errors=len(errors), replaced=req.replace, ) return {"added": added, "errors": errors, "replaced": req.replace} def admin_export_synonyms(): """Export the current synonyms as JSONL — one ``{canonical, variants}`` per line.""" from fastapi.responses import PlainTextResponse from ..core.hebrew_synonyms import list_synonyms import json as _json lines = [ _json.dumps({"canonical": k, "variants": v}, ensure_ascii=False) for k, v in list_synonyms().items() ] body = "\n".join(lines) + ("\n" if lines else "") return PlainTextResponse(body, media_type="application/x-ndjson", headers={ "Content-Disposition": 'attachment; filename="synonyms.jsonl"', }) def admin_add_synonym(body: SynonymBody, request: Request): """Add a synonym entry or extend an existing one's variant list.""" from ..core.hebrew_synonyms import add_synonym try: result = add_synonym(body.canonical, body.variants) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) # Purge caches — old answers may have used stale expansion _pipeline.cache.clear() get_cache().clear() _maybe_autosave_synonyms() get_obs().audit( "synonyms.add", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), canonical=body.canonical, n_variants=len(result["variants"]), ) return result def admin_delete_synonym(canonical: str, request: Request): from ..core.hebrew_synonyms import remove_synonym ok = remove_synonym(canonical) if not ok: raise HTTPException(status_code=404, detail={"canonical": canonical}) _pipeline.cache.clear() get_cache().clear() _maybe_autosave_synonyms() get_obs().audit( "synonyms.remove", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), canonical=canonical, ) return {"removed": True, "canonical": canonical} def admin_snapshot_status(): """Report the periodic auto-snapshotter state (if configured).""" auto = get_autosnapshotter() if auto is None: return { "enabled": False, "hint": ("set TAU_RAG_SNAPSHOT_PATH and " "TAU_RAG_SNAPSHOT_INTERVAL= to enable"), } return {"enabled": True, **auto.status()} def admin_snapshot_load(req: SnapshotLoadRequest, request: Request): base_path = req.path or _default_snapshot_path() # Resolve which generation to load: 0 = current, N>0 = rotated backup from ..snapshot import _gen_path from pathlib import Path as _P resolved = str(_gen_path(_P(base_path), int(req.generation or 0))) if not _os.path.exists(resolved): raise HTTPException( status_code=404, detail={"snapshot_not_found": resolved, "generation": int(req.generation or 0)}, ) summary = _pipeline.load_snapshot(resolved, replace=req.replace) summary["generation"] = int(req.generation or 0) summary["path_loaded"] = resolved get_obs().audit( "snapshot.load", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), path=resolved, **{k: v for k, v in summary.items() if k not in ("warnings", "generation", "path_loaded")}, ) return summary # ---- API key management (admin-only endpoints) --------------------------- class APIKeyCreateRequest(BaseModel): label: str scopes: List[str] = ["read", "write"] def admin_create_key(req: APIKeyCreateRequest, request: Request): raw = get_auth().create(label=req.label, scopes=req.scopes) get_obs().audit( "key.create", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), label=req.label, scopes=req.scopes, ) return { "api_key": raw, "label": req.label, "scopes": req.scopes, "warning": "save this key now — it cannot be retrieved later", } def admin_list_keys(): return {"keys": get_auth().list_keys()} def admin_revoke_key(hash_prefix: str, request: Request): ok = get_auth().revoke(hash_prefix) if not ok: raise HTTPException(status_code=404, detail="key not found") get_obs().audit( "key.revoke", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), target_prefix=hash_prefix, ) return {"revoked": True, "hash_prefix": hash_prefix} class APIKeyRotateRequest(BaseModel): grace_seconds: float = 300.0 def admin_rotate_key( hash_prefix: str, req: APIKeyRotateRequest, request: Request, ): """Rotate an API key with a grace period (v1.76). Generates a new key with the same label+scopes. The old key remains valid until ``grace_seconds`` elapses, after which it stops working. Clients should rotate their config during the window. """ result = get_auth().rotate(hash_prefix, grace_seconds=req.grace_seconds) if result is None: raise HTTPException( status_code=404, detail={"error": "key not found or already revoked", "hash_prefix": hash_prefix}, ) get_obs().audit( "key.rotate", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), old_prefix=result["old_prefix"], new_prefix=result["new_prefix"], grace_seconds=result["grace_seconds"], ) return { **result, "warning": "save the new key now — it cannot be retrieved later", } # ---- v2.7 Maintenance / drain mode ------------------------------------- class MaintenanceOnRequest(BaseModel): reason: str = "" retry_after: int = 30 def admin_maintenance_on(req: MaintenanceOnRequest, request: Request): """Turn on maintenance / drain mode (v2.7). Non-admin requests get 503 + ``Retry-After`` until turned off. Admin callers (this endpoint included) always flow through. """ from ..middleware.maintenance import get_maintenance m = get_maintenance() m.enable(reason=req.reason, retry_after=req.retry_after) get_obs().audit( "maintenance.on", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), reason=req.reason, retry_after=req.retry_after, ) return {"ok": True, **m.snapshot()} def admin_maintenance_off(request: Request): """Clear maintenance / drain mode (v2.7).""" from ..middleware.maintenance import get_maintenance m = get_maintenance() snap_before = m.snapshot() m.disable() get_obs().audit( "maintenance.off", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), duration_sec=round(snap_before["duration_sec"], 2), ) return {"ok": True, **m.snapshot()} def admin_maintenance_status(): """Current maintenance / drain state (v2.7).""" from ..middleware.maintenance import get_maintenance return get_maintenance().snapshot() # ---- v2.8 PII redaction ------------------------------------------------- class PIIToggleRequest(BaseModel): enabled: bool = True def admin_pii_toggle(req: PIIToggleRequest, request: Request): """Enable or disable PII redaction at runtime (v2.8). Default is driven by ``TAU_RAG_PII_REDACT`` env at startup; this endpoint lets ops toggle without a restart (useful when you realize bodies were going to the log unredacted). """ from ..middleware.pii_redaction import get_pii_redactor r = get_pii_redactor() r.set_enabled(req.enabled) get_obs().audit( "pii_redaction.toggle", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, **r.stats()} def admin_pii_stats(): """PII redactor counters: how many IDs/phones/emails/CCs have been scrubbed since startup (or last reset).""" from ..middleware.pii_redaction import get_pii_redactor return get_pii_redactor().stats() def admin_pii_reset(request: Request): """Zero the per-kind counters. Does not change enabled state.""" from ..middleware.pii_redaction import get_pii_redactor r = get_pii_redactor() r.reset() get_obs().audit( "pii_redaction.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ) return {"ok": True, **r.stats()} # ---- v2.9 Slow-query detection ----------------------------------------- class SlowThresholdRequest(BaseModel): ms: float def admin_slow_queries(n: int = 20): """Top-N slowest requests + per-path aggregates + summary stats. ``n`` bounds the ``top`` list length (default 20, max 100). """ from ..middleware.slow_queries import get_slow_tracker n = min(100, max(1, int(n))) t = get_slow_tracker() return { "stats": t.stats(), "top": t.top_n(n), "by_path": t.by_path(), } def admin_slow_threshold(req: SlowThresholdRequest, request: Request): """Set the slow-query threshold in ms at runtime. 0 disables.""" from ..middleware.slow_queries import get_slow_tracker t = get_slow_tracker() old = t.threshold_ms t.set_threshold(req.ms) get_obs().audit( "slow_queries.threshold", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), old_ms=old, new_ms=t.threshold_ms, ) return {"ok": True, **t.stats()} def admin_slow_reset(request: Request): """Clear the ring buffer and per-path aggregates.""" from ..middleware.slow_queries import get_slow_tracker t = get_slow_tracker() t.reset() get_obs().audit( "slow_queries.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ) return {"ok": True, **t.stats()} # ---- v2.12 Daily quota ------------------------------------------------- class QuotaSetRequest(BaseModel): key_prefix: str limit: int def admin_quotas_get(): """Dump all quota state: limits, usage, day cursor, reset timer.""" from ..middleware.quota import get_quota_tracker return get_quota_tracker().stats() def admin_quota_set(req: QuotaSetRequest, request: Request): """Set a daily quota for a key (by hash prefix). ``limit=0`` = unlimited (also equivalent to deleting the quota). Usage counters are NOT reset — operator can raise the cap mid-day without wiping the meter. """ from ..middleware.quota import get_quota_tracker t = get_quota_tracker() t.set_quota(req.key_prefix, req.limit) get_obs().audit( "quota.set", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), key_prefix=req.key_prefix, limit=req.limit, ) return {"ok": True, "key_prefix": req.key_prefix, "limit": req.limit} def admin_quota_clear(key_prefix: str, request: Request): """Remove quota enforcement for a key entirely (back to unlimited and zero-out its usage counter).""" from ..middleware.quota import get_quota_tracker t = get_quota_tracker() removed = t.clear_quota(key_prefix) if not removed: raise HTTPException( status_code=404, detail={"error": "no quota set for this key_prefix", "key_prefix": key_prefix}, ) get_obs().audit( "quota.clear", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), key_prefix=key_prefix, ) return {"ok": True, "key_prefix": key_prefix, "removed": True} def admin_quota_reset_all(request: Request): """Wipe all quota state — limits AND usage. Testing / incident recovery. Audit-logged.""" from ..middleware.quota import get_quota_tracker t = get_quota_tracker() t.reset() get_obs().audit( "quota.reset_all", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ) return {"ok": True, **t.stats()} # ---- v2.13 Idempotency ------------------------------------------------- def admin_idempotency_stats(): from ..middleware.idempotency import get_idempotency_store return get_idempotency_store().stats() def admin_idempotency_reset(request: Request): from ..middleware.idempotency import get_idempotency_store get_idempotency_store().reset() get_obs().audit( "idempotency.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ) return {"ok": True, **get_idempotency_store().stats()} class IdemTTLRequest(BaseModel): ttl_sec: float def admin_idempotency_ttl(req: IdemTTLRequest, request: Request): from ..middleware.idempotency import get_idempotency_store s = get_idempotency_store() s.set_ttl(req.ttl_sec) get_obs().audit( "idempotency.ttl", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ttl_sec=req.ttl_sec, ) return {"ok": True, **s.stats()} # ---- v2.14 Request timeout --------------------------------------------- class TimeoutRequest(BaseModel): timeout_ms: float def admin_request_timeout_stats(): from ..middleware.request_timeout import get_timeout_guard return get_timeout_guard().stats() def admin_request_timeout_set(req: TimeoutRequest, request: Request): """Set wall-clock request timeout in ms. 0 disables.""" from ..middleware.request_timeout import get_timeout_guard g = get_timeout_guard() old = g.timeout_ms g.set_timeout_ms(req.timeout_ms) get_obs().audit( "request_timeout.set", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), old_ms=old, new_ms=g.timeout_ms, ) return {"ok": True, **g.stats()} def admin_request_timeout_reset(request: Request): from ..middleware.request_timeout import get_timeout_guard g = get_timeout_guard() g.reset() return {"ok": True, **g.stats()} # ---- v2.16 Body limit -------------------------------------------------- class BodyLimitRequest(BaseModel): max_bytes: int def admin_body_limit_get(): from ..middleware.body_limit import get_body_limit_guard return get_body_limit_guard().stats() def admin_body_limit_set(req: BodyLimitRequest, request: Request): """Set max body bytes at runtime. 0 disables.""" from ..middleware.body_limit import get_body_limit_guard g = get_body_limit_guard() old = g.max_bytes g.set_max_bytes(req.max_bytes) get_obs().audit( "body_limit.set", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), old_bytes=old, new_bytes=g.max_bytes, ) return {"ok": True, **g.stats()} # ---- v2.17 Deprecation registry ---------------------------------------- class DeprecationRegisterRequest(BaseModel): path_prefix: str sunset_at: Optional[float] = None # unix ts successor_url: Optional[str] = None docs_url: Optional[str] = None note: Optional[str] = None def admin_deprecations_get(): from ..middleware.deprecation import get_deprecation_registry return {"entries": get_deprecation_registry().all_entries()} def admin_deprecations_register(req: DeprecationRegisterRequest, request: Request): from ..middleware.deprecation import get_deprecation_registry r = get_deprecation_registry() try: entry = r.register( path_prefix=req.path_prefix, sunset_at=req.sunset_at, successor_url=req.successor_url, docs_url=req.docs_url, note=req.note, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "deprecation.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), path_prefix=req.path_prefix, sunset_at=req.sunset_at, ) from dataclasses import asdict as _asdict return {"ok": True, **_asdict(entry)} def admin_deprecations_clear(path_prefix: str, request: Request): from ..middleware.deprecation import get_deprecation_registry # Path params strip the leading /; add it back if not path_prefix.startswith("/"): path_prefix = "/" + path_prefix removed = get_deprecation_registry().unregister(path_prefix) if not removed: raise HTTPException(status_code=404, detail={"error": "not registered", "path_prefix": path_prefix}) get_obs().audit( "deprecation.unregister", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), path_prefix=path_prefix, ) return {"ok": True, "removed": True, "path_prefix": path_prefix} # ---- v2.18 Feature flags ----------------------------------------------- class FlagSetRequest(BaseModel): name: str value: bool def admin_flags_get(): from ..middleware.feature_flags import get_feature_flags return {"flags": get_feature_flags().all_flags()} def admin_flags_set(req: FlagSetRequest, request: Request): from ..middleware.feature_flags import get_feature_flags fl = get_feature_flags() entry = fl.set(req.name, req.value) if entry is None: raise HTTPException( status_code=404, detail={"error": "flag not registered", "name": req.name}) get_obs().audit( "flag.set", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=req.name, value=req.value, ) from dataclasses import asdict as _asdict return {"ok": True, **_asdict(entry)} def admin_flags_reset(name: str, request: Request): from ..middleware.feature_flags import get_feature_flags fl = get_feature_flags() entry = fl.reset_to_default(name) if entry is None: raise HTTPException(status_code=404, detail={"error": "flag not registered", "name": name}) get_obs().audit( "flag.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, ) from dataclasses import asdict as _asdict return {"ok": True, **_asdict(entry)} # ---- v2.19 SIGTERM shutdown manager ------------------------------------ class ShutdownConfigRequest(BaseModel): grace_sec: Optional[float] = None install: Optional[bool] = None def admin_shutdown_stats(): from ..middleware.shutdown import get_shutdown_manager return get_shutdown_manager().stats() def admin_shutdown_config(req: ShutdownConfigRequest, request: Request): """Configure graceful-shutdown behavior. ``install=True`` attaches SIGTERM/SIGINT handlers; grace_sec sets the drain-before-exit window.""" from ..middleware.shutdown import get_shutdown_manager m = get_shutdown_manager() if req.grace_sec is not None: m.set_grace_sec(req.grace_sec) if req.install is True: m.install() elif req.install is False: m.uninstall() get_obs().audit( "shutdown.config", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), grace_sec=req.grace_sec, install=req.install, ) return {"ok": True, **m.stats()} # ---- v2.20 Per-endpoint timeouts (extends v2.14 admin) ---------------- class EndpointTimeoutRequest(BaseModel): path_prefix: str timeout_ms: float def admin_endpoint_timeout_set(req: EndpointTimeoutRequest, request: Request): """Set a per-endpoint timeout override (v2.20). ``timeout_ms=0`` removes the override; endpoint falls back to global.""" from ..middleware.request_timeout import get_timeout_guard g = get_timeout_guard() g.set_endpoint_timeout(req.path_prefix, req.timeout_ms) get_obs().audit( "request_timeout.endpoint", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), path_prefix=req.path_prefix, timeout_ms=req.timeout_ms, ) return {"ok": True, **g.stats()} def admin_endpoint_timeout_clear(path_prefix: str, request: Request): from ..middleware.request_timeout import get_timeout_guard g = get_timeout_guard() if not path_prefix.startswith("/"): path_prefix = "/" + path_prefix removed = g.clear_endpoint_timeout(path_prefix) if not removed: raise HTTPException(status_code=404, detail={"error": "not set", "path_prefix": path_prefix}) return {"ok": True, "removed": True, **g.stats()} # ---- v2.21 Cost tracking --------------------------------------------- class CostRateRequest(BaseModel): endpoint: str # path prefix, or "*" for default per_call_usd: float = 0.0 per_1k_tokens_usd: float = 0.0 def admin_costs_get(n: int = 20): from ..middleware.cost import get_cost_tracker n = min(100, max(1, int(n))) t = get_cost_tracker() return { "stats": t.stats(), "top_spenders": t.top_spenders(n), } def admin_costs_set_rate(req: CostRateRequest, request: Request): from ..middleware.cost import get_cost_tracker t = get_cost_tracker() t.set_rate(req.endpoint, per_call_usd=req.per_call_usd, per_1k_tokens_usd=req.per_1k_tokens_usd) get_obs().audit( "cost.rate", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), endpoint=req.endpoint, per_call_usd=req.per_call_usd, per_1k_tokens_usd=req.per_1k_tokens_usd, ) return {"ok": True, **t.stats()} def admin_costs_key_usage(key_prefix: str): from ..middleware.cost import get_cost_tracker return get_cost_tracker().usage_for_key(key_prefix) def admin_costs_reset(request: Request): from ..middleware.cost import get_cost_tracker t = get_cost_tracker() t.reset() return {"ok": True, **t.stats()} # ---- v2.22 Response compression ---------------------------------------- class CompressionConfigRequest(BaseModel): enabled: Optional[bool] = None min_size_bytes: Optional[int] = None def admin_compression_get(): from ..middleware.compression import get_compression_tracker return get_compression_tracker().stats() def admin_compression_set(req: CompressionConfigRequest, request: Request): from ..middleware.compression import get_compression_tracker t = get_compression_tracker() if req.enabled is not None: t.set_enabled(req.enabled) if req.min_size_bytes is not None: t.set_min_size(req.min_size_bytes) get_obs().audit( "compression.config", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, min_size_bytes=req.min_size_bytes, ) return {"ok": True, **t.stats()} # ---- v2.23 IP allowlist ------------------------------------------------ class IPAllowlistAddRequest(BaseModel): key_prefix: str ip_or_cidr: str def admin_ip_allowlist_get(): from ..middleware.ip_allowlist import get_ip_allowlist s = get_ip_allowlist() return {"entries": s.all_entries(), **s.stats()} def admin_ip_allowlist_add(req: IPAllowlistAddRequest, request: Request): from ..middleware.ip_allowlist import get_ip_allowlist s = get_ip_allowlist() try: s.add(req.key_prefix, req.ip_or_cidr) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "ip_allowlist.add", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), key_prefix=req.key_prefix, ip_or_cidr=req.ip_or_cidr, ) return {"ok": True, "key_prefix": req.key_prefix, "entries": s.list_for_key(req.key_prefix)} def admin_ip_allowlist_clear_key(key_prefix: str, request: Request): from ..middleware.ip_allowlist import get_ip_allowlist s = get_ip_allowlist() removed = s.clear_key(key_prefix) if not removed: raise HTTPException(status_code=404, detail={"error": "no entries", "key_prefix": key_prefix}) return {"ok": True, "key_prefix": key_prefix, "removed": True} # ---- v2.24 Query complexity -------------------------------------------- class QueryComplexityConfigRequest(BaseModel): max_tokens: Optional[int] = None max_chars: Optional[int] = None min_unique_ratio: Optional[float] = None enabled: Optional[bool] = None def admin_query_complexity_get(): from ..middleware.query_complexity import get_query_complexity return get_query_complexity().stats() def admin_query_complexity_set(req: QueryComplexityConfigRequest, request: Request): from ..middleware.query_complexity import get_query_complexity s = get_query_complexity() s.configure(max_tokens=req.max_tokens, max_chars=req.max_chars, min_unique_ratio=req.min_unique_ratio, enabled=req.enabled) get_obs().audit( "query_complexity.config", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), **req.model_dump(exclude_unset=True), ) return {"ok": True, **s.stats()} def admin_query_complexity_reset(request: Request): from ..middleware.query_complexity import get_query_complexity s = get_query_complexity() s.reset() return {"ok": True, **s.stats()} # ---- v2.25 Per-key labels ---------------------------------------------- class KeyLabelRequest(BaseModel): key_prefix: str name: str value: str def admin_key_labels_get(): from ..middleware.key_labels import get_key_labels s = get_key_labels() return {"entries": s.all_entries(), **s.stats()} def admin_key_labels_for_key(key_prefix: str): from ..middleware.key_labels import get_key_labels return {"key_prefix": key_prefix, "labels": get_key_labels().get_labels(key_prefix)} def admin_key_labels_set(req: KeyLabelRequest, request: Request): from ..middleware.key_labels import get_key_labels s = get_key_labels() s.set_label(req.key_prefix, req.name, req.value) get_obs().audit( "key_labels.set", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), key_prefix=req.key_prefix, name=req.name, value=req.value, ) return {"ok": True, "key_prefix": req.key_prefix, "labels": s.get_labels(req.key_prefix)} def admin_key_labels_remove(key_prefix: str, name: str, request: Request): from ..middleware.key_labels import get_key_labels s = get_key_labels() removed = s.remove_label(key_prefix, name) if not removed: raise HTTPException(status_code=404, detail={"error": "label not set", "key_prefix": key_prefix, "name": name}) return {"ok": True, "key_prefix": key_prefix, "removed": name} def admin_key_labels_clear(key_prefix: str, request: Request): from ..middleware.key_labels import get_key_labels s = get_key_labels() removed = s.clear_key(key_prefix) if not removed: raise HTTPException(status_code=404, detail={"error": "no labels", "key_prefix": key_prefix}) return {"ok": True, "key_prefix": key_prefix, "removed": True} # ---- v2.26 i18n -------------------------------------------------------- class I18nRegisterRequest(BaseModel): code: str lang: str message: str def admin_i18n_get(): from ..middleware.i18n import get_i18n r = get_i18n() return {"entries": r.all_entries(), **r.stats()} def admin_i18n_register(req: I18nRegisterRequest, request: Request): from ..middleware.i18n import get_i18n r = get_i18n() r.register(req.code, req.lang, req.message) get_obs().audit( "i18n.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), code=req.code, lang=req.lang, ) return {"ok": True, **r.stats()} def admin_i18n_unregister(code: str, lang: str, request: Request): from ..middleware.i18n import get_i18n r = get_i18n() if not r.unregister(code, lang): raise HTTPException(status_code=404, detail={"error": "not registered", "code": code, "lang": lang}) return {"ok": True, "code": code, "lang": lang, "removed": True} # ---- v2.27 Request bundle ---------------------------------------------- def admin_request_bundle(request_id: str): """Post-incident forensic dump: request_log + captured_body + spans + audit_events + timing, all for one request_id. One call instead of N.""" from ..middleware.request_bundle import build_bundle return build_bundle(request_id) # ---- v2.28 Per-label aggregation --------------------------------------- def admin_aggregate_by_label(label_name: str, n: int = 2000): """Aggregate recent traffic by a label value (tenant, tier, region). Returns per-bucket n_requests, latency p50/p95/p99, cost, status counts. Stateless query over obs log + cost tracker.""" from ..middleware.label_aggregation import aggregate_by_label return aggregate_by_label(label_name, n_rows=min(10000, max(100, int(n)))) # ---- v2.29 Unified admin dashboard HTML -------------------------------- def admin_dashboard(refresh: int = 0): """One-page HTML dashboard consolidating every admin stat. Inline CSS, zero JS, escape-safe, optional meta-refresh.""" from fastapi.responses import HTMLResponse from .ops_dashboard_ui import render_dashboard from ..middleware import ( get_obs, get_limiter, get_quota_tracker, get_body_limit_guard, get_compression_tracker, get_slow_tracker, get_cost_tracker, get_maintenance, get_ip_allowlist, get_pii_redactor, get_readiness_registry, ) from ..observability.tracing import get_tracer obs = get_obs() html_out = render_dashboard( version=app.version, obs_stats=obs.stats(), limiter_stats=get_limiter().stats(), quota_stats=get_quota_tracker().stats(), body_stats=get_body_limit_guard().stats(), compression_stats=get_compression_tracker().stats(), slow_stats=get_slow_tracker().stats(), spans_count=len(get_tracer().spans), cost_stats=get_cost_tracker().stats(), top_spenders=get_cost_tracker().top_spenders(5), maint_stats=get_maintenance().snapshot(), ip_allow_stats=get_ip_allowlist().stats(), pii_stats=get_pii_redactor().stats(), readiness=get_readiness_registry().evaluate(), refresh_sec=max(0, int(refresh)), ) return HTMLResponse(content=html_out, status_code=200) # ---- v2.30 SLO tracking ------------------------------------------------ class SLORegisterRequest(BaseModel): name: str kind: str # "latency" | "availability" target_pct: float window: int = 1000 latency_threshold_ms: float = 0.0 def admin_slo_get(): from ..middleware.slo import get_slo_tracker return {"slos": get_slo_tracker().snapshot()} def admin_slo_register(req: SLORegisterRequest, request: Request): from ..middleware.slo import get_slo_tracker t = get_slo_tracker() try: slo = t.register( name=req.name, kind=req.kind, target_pct=req.target_pct, window=req.window, latency_threshold_ms=req.latency_threshold_ms, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "slo.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), **req.model_dump(exclude_unset=True), ) return {"ok": True, "name": slo.name, "kind": slo.kind, "target_pct": slo.target_pct} def admin_slo_unregister(name: str, request: Request): from ..middleware.slo import get_slo_tracker t = get_slo_tracker() if not t.unregister(name): raise HTTPException(status_code=404, detail={"error": "not registered", "name": name}) return {"ok": True, "name": name, "removed": True} def admin_slo_reset(request: Request): from ..middleware.slo import get_slo_tracker get_slo_tracker().reset() return {"ok": True} # ---- v2.31 Async jobs -------------------------------------------------- class AsyncGenerateRequest(BaseModel): query: str k: int = 5 @app.post("/v1/generate/async", status_code=202) def generate_async(req: AsyncGenerateRequest, request: Request): """Schedule a generation as a background job. Returns 202 Accepted + job_id. Poll GET /v1/jobs/{id} for status.""" from ..middleware.async_jobs import get_job_queue from ..core.types import Query def _work(): q = Query(text=req.query) return _pipeline.run(q, k=req.k).to_dict() job_id = get_job_queue().enqueue(_work, kind="generate", meta={"k": req.k}) return {"job_id": job_id, "status": "queued"} @app.get("/v1/jobs/{job_id}") def get_job(job_id: str): from ..middleware.async_jobs import get_job_queue job = get_job_queue().get(job_id) if job is None: raise HTTPException(status_code=404, detail={"error": "job not found", "job_id": job_id}) return job.to_dict() @app.post("/v1/jobs/{job_id}/cancel") def cancel_job(job_id: str): from ..middleware.async_jobs import get_job_queue if get_job_queue().cancel(job_id): return {"ok": True, "job_id": job_id, "cancelled": True} raise HTTPException(status_code=404, detail={"error": "job not cancellable", "job_id": job_id}) def admin_jobs_list(status: Optional[str] = None, limit: int = 50): from ..middleware.async_jobs import get_job_queue q = get_job_queue() return {"stats": q.stats(), "jobs": q.list_jobs(status_filter=status, limit=limit)} # ---- v2.32 Cost alerts ------------------------------------------------- class CostAlertRequest(BaseModel): key_prefix: str thresholds_usd: List[float] class CostAlertGlobalRequest(BaseModel): thresholds_usd: List[float] def admin_cost_alerts_get(): from ..middleware.cost_alerts import get_cost_alerts return get_cost_alerts().stats() def admin_cost_alerts_set_key(req: CostAlertRequest, request: Request): from ..middleware.cost_alerts import get_cost_alerts get_cost_alerts().set_thresholds(req.key_prefix, req.thresholds_usd) get_obs().audit( "cost_alerts.set_key", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), key_prefix=req.key_prefix, thresholds_usd=req.thresholds_usd, ) return {"ok": True, **get_cost_alerts().stats()} def admin_cost_alerts_set_global(req: CostAlertGlobalRequest, request: Request): from ..middleware.cost_alerts import get_cost_alerts get_cost_alerts().set_global_thresholds(req.thresholds_usd) get_obs().audit( "cost_alerts.set_global", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), thresholds_usd=req.thresholds_usd, ) return {"ok": True, **get_cost_alerts().stats()} def admin_cost_alerts_reset_fired(request: Request): from ..middleware.cost_alerts import get_cost_alerts get_cost_alerts().reset_fired() return {"ok": True, **get_cost_alerts().stats()} # ---- v2.33 Prometheus histograms --------------------------------------- @app.get("/metrics/histograms", response_class=Response, include_in_schema=False) def prometheus_histograms(): """Prometheus-format request-duration histograms per path.""" from fastapi.responses import PlainTextResponse from ..middleware.prom_histograms import get_prom_histograms text = get_prom_histograms().render() return PlainTextResponse(content=text, status_code=200, media_type="text/plain; version=0.0.4") def admin_histograms_stats(): from ..middleware.prom_histograms import get_prom_histograms return get_prom_histograms().stats() # ---- v2.34 ETag / conditional GET -------------------------------------- @app.get("/v1/etag_stats") def etag_stats_cachable(request: Request): """Cacheable version-manifest-ish endpoint demonstrating ETag. Returns 304 when the client's ``If-None-Match`` matches current payload hash.""" from fastapi.responses import JSONResponse as _JR from fastapi.responses import Response as _Resp from ..middleware.etag import ( compute_etag as _cet, matches_if_none_match as _mch, get_etag_tracker, ) try: n_docs = len(getattr(_pipeline, "_docs", [])) except Exception: n_docs = 0 payload = {"version": app.version, "n_documents": n_docs} etag = _cet(payload) inm = request.headers.get("if-none-match") if _mch(etag, inm): get_etag_tracker().record("/v1/etag_stats", was_304=True) return _Resp(status_code=304, headers={"ETag": etag, "X-Request-ID": getattr(request.state, "request_id", "")}) get_etag_tracker().record("/v1/etag_stats", was_304=False) return _JR(content=payload, status_code=200, headers={"ETag": etag, "Cache-Control": "private, must-revalidate"}) # v3.x — Generator health diagnostic. Lets the user check if a local # LLM (Ollama / TAU Native) is wired up and reachable before running # a real query. Public, read-only. Returns provider-specific status # blocks — e.g. the TAU native generator reports checkpoint path, # epoch, loss, and parameter count if it can load. @app.get("/v1/generator/health") def generator_health(): try: from ..pipeline import get_pipeline pipe = get_pipeline() gen = pipe.generator result = { "provider": pipe.config.generation.provider, "model": pipe.config.generation.model, "class": type(gen).__name__, "name": getattr(gen, "name", "unknown"), } if hasattr(gen, "health") and callable(gen.health): try: result["health"] = gen.health() except Exception as e: result["health"] = {"ok": False, "error": f"{type(e).__name__}: {e}"} return result except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} # v3.x — Training data summary. Returns counts of (query, context, answer) # triples collected via TAU_RAG_COLLECT_TRAINING=1, plus feedback rollup. # Safe public endpoint — no raw queries leaked. @app.get("/v1/training_data/summary") def training_data_summary(): try: from ..middleware.training_collector import get_training_collector return get_training_collector().summary() except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} # v2.x — PUBLIC suggestions endpoint for the UI. # Intentionally NOT under /v1/admin/*, so the global admin-scope # middleware lets it through. Returns a redacted view of the top # queries — just the text and count — with no sensitive fields # (no latencies, no user hashes, no timestamps). Safe for the static # welcome screen to populate its "suggested questions" chips. @app.get("/v1/suggestions") def public_suggestions(limit: int = 8): """Top N recent/popular queries — safe read-only slice for the UI. Not admin-gated. Returns: ``{"top_queries": [{"text": str, "count": int}, ...]}``. Falls back to ``{"top_queries": []}`` if the query-stats store is empty, unavailable, or too young to be meaningful (<2 uses per item). """ try: from ..middleware import get_query_stats store = get_query_stats() raw = store.top(n=max(1, min(int(limit), 20))) except Exception: raw = [] out = [] for row in (raw or []): # top() returns dicts keyed by fingerprint with at least 'sample' # and 'count' (see middleware/query_stats.py). Defensive-access # so schema drift doesn't 500 the UI. text = (row.get("sample") or row.get("text") or row.get("query") or "").strip() count = int(row.get("count") or row.get("n") or 0) if not text or count < 1: continue if len(text) < 4 or len(text) > 120: continue # filter junk-length queries out.append({"text": text, "count": count}) return {"top_queries": out} def admin_etag_stats(): from ..middleware.etag import get_etag_tracker return get_etag_tracker().stats() # ---- v2.35 HMAC signing store ----------------------------------------- class HMACRegisterRequest(BaseModel): key_prefix: str secret: str def admin_hmac_get(): from ..middleware.hmac_signing import get_hmac_signing s = get_hmac_signing() return {"keys": s.all_entries(), **s.stats()} def admin_hmac_register(req: HMACRegisterRequest, request: Request): """Register an HMAC secret for a key (by hash prefix). Once registered, the key MUST sign future requests.""" from ..middleware.hmac_signing import get_hmac_signing s = get_hmac_signing() s.set_secret(req.key_prefix, req.secret) get_obs().audit( "hmac.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), key_prefix=req.key_prefix, ) return {"ok": True, "key_prefix": req.key_prefix} def admin_hmac_remove(key_prefix: str, request: Request): from ..middleware.hmac_signing import get_hmac_signing s = get_hmac_signing() if not s.clear_secret(key_prefix): raise HTTPException(status_code=404, detail={"error": "no secret", "key_prefix": key_prefix}) return {"ok": True, "key_prefix": key_prefix, "removed": True} # ---- v2.36 Batch query endpoint ---------------------------------------- class BatchQueryItem(BaseModel): query: str k: int = 5 class BatchSearchRequest(BaseModel): queries: List[BatchQueryItem] @app.post("/v1/search/batch") def search_batch(req: BatchSearchRequest, request: Request): """Execute multiple searches in one request. Shares auth/rate/quota checks. Each query independently returns its own result or error; one failure doesn't fail the whole batch.""" from ..core.types import Query if not req.queries: raise HTTPException( status_code=400, detail={"error": "empty batch"}, ) # Reasonable cap — same spirit as body-limit if len(req.queries) > 100: raise HTTPException( status_code=413, detail={"error": "too many queries in batch", "limit": 100, "got": len(req.queries)}, ) results: List[Dict[str, Any]] = [] for i, item in enumerate(req.queries): try: q = Query(text=item.query) result = _pipeline.run(q, k=item.k) results.append({ "index": i, "query": item.query, "ok": True, "result": result.to_dict(), }) except Exception as e: results.append({ "index": i, "query": item.query, "ok": False, "error": f"{type(e).__name__}: {e}"[:200], }) return { "n_queries": len(req.queries), "n_succeeded": sum(1 for r in results if r["ok"]), "n_failed": sum(1 for r in results if not r["ok"]), "results": results, } # ---- v2.37 Webhook retry + DLQ ----------------------------------------- def admin_webhook_retry_get(): from ..middleware.webhook_retry import get_webhook_retry q = get_webhook_retry() return { "stats": q.stats(), "pending": q.pending(), "dlq": q.dlq(), } def admin_webhook_replay(event_id: str, request: Request): from ..middleware.webhook_retry import get_webhook_retry if not get_webhook_retry().replay_dlq(event_id): raise HTTPException(status_code=404, detail={"error": "not in dlq", "event_id": event_id}) get_obs().audit( "webhook_retry.replay", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), event_id=event_id, ) return {"ok": True, "event_id": event_id, "replayed": True} def admin_webhook_purge_dlq(request: Request): from ..middleware.webhook_retry import get_webhook_retry n = get_webhook_retry().purge_dlq() get_obs().audit( "webhook_retry.purge", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), purged=n, ) return {"ok": True, "purged": n} # ---- v2.38 Hebrew normalization --------------------------------------- class HebrewNormConfigRequest(BaseModel): enabled: bool def admin_hebrew_norm_get(): from ..middleware.hebrew_normalize import get_hebrew_normalizer return get_hebrew_normalizer().stats() def admin_hebrew_norm_set(req: HebrewNormConfigRequest, request: Request): from ..middleware.hebrew_normalize import get_hebrew_normalizer n = get_hebrew_normalizer() n.set_enabled(req.enabled) get_obs().audit( "hebrew_normalize.toggle", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, **n.stats()} class HebrewNormPreviewRequest(BaseModel): text: str def admin_hebrew_norm_preview(req: HebrewNormPreviewRequest): """Preview normalization result — useful for clients to see what their query canonicalizes to.""" from ..middleware.hebrew_normalize import normalize, would_canonicalize_to return { "input": req.text, "normalized": normalize(req.text), "changed": normalize(req.text) != req.text, } # ---- v2.39 Audit log search -------------------------------------------- class AuditSearchRequest(BaseModel): kind: Optional[str] = None kind_prefix: Optional[str] = None actor_prefix: Optional[str] = None since: Optional[float] = None until: Optional[float] = None substring: Optional[str] = None limit: int = 100 offset: int = 0 def admin_audit_search(req: AuditSearchRequest): from ..middleware.audit_search import search_audit return search_audit( kind=req.kind, kind_prefix=req.kind_prefix, actor_prefix=req.actor_prefix, since=req.since, until=req.until, substring=req.substring, limit=req.limit, offset=req.offset, ) def admin_audit_summary(since: Optional[float] = None): from ..middleware.audit_search import audit_summary return audit_summary(since=since) # ---- v2.40 Tombstones / soft-delete ------------------------------------ class SoftDeleteRequest(BaseModel): doc_id: str reason: Optional[str] = None def admin_tombstones_get(): from ..middleware.tombstones import get_tombstones t = get_tombstones() return { "stats": t.stats(), "tombstones": t.all_tombstones(), } def admin_tombstones_soft_delete(req: SoftDeleteRequest, request: Request): from ..middleware.tombstones import get_tombstones t = get_tombstones() actor_hash = _hash_prefix(request.headers.get("x-api-key")) tomb = t.soft_delete(req.doc_id, by_prefix=actor_hash, reason=req.reason) get_obs().audit( "tombstone.soft_delete", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), doc_id=req.doc_id, reason=req.reason, ) return {"ok": True, **tomb.to_dict()} def admin_tombstones_restore(doc_id: str, request: Request): from ..middleware.tombstones import get_tombstones t = get_tombstones() if not t.restore(doc_id): raise HTTPException(status_code=404, detail={"error": "not tombstoned", "doc_id": doc_id}) get_obs().audit( "tombstone.restore", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), doc_id=doc_id, ) return {"ok": True, "doc_id": doc_id, "restored": True} def admin_tombstones_purge(doc_id: str, request: Request): """Force-remove tombstone before retention expires.""" from ..middleware.tombstones import get_tombstones t = get_tombstones() if not t.purge(doc_id): raise HTTPException(status_code=404, detail={"error": "not tombstoned", "doc_id": doc_id}) get_obs().audit( "tombstone.purge", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), doc_id=doc_id, ) return {"ok": True, "doc_id": doc_id, "purged": True} def admin_tombstones_purge_expired(request: Request): from ..middleware.tombstones import get_tombstones t = get_tombstones() expired = t.purge_expired() get_obs().audit( "tombstone.purge_expired", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), n_purged=len(expired), ) return {"ok": True, "n_purged": len(expired), "doc_ids": expired} # ---- v2.41 Query coalescing ------------------------------------------- class CoalescingConfigRequest(BaseModel): enabled: bool def admin_coalescing_get(n: int = 10): from ..middleware.query_coalescing import get_coalescing_tracker t = get_coalescing_tracker() return { "stats": t.stats(), "top_classes": t.top_classes(n=n), } def admin_coalescing_set(req: CoalescingConfigRequest, request: Request): from ..middleware.query_coalescing import get_coalescing_tracker t = get_coalescing_tracker() t.set_enabled(req.enabled) get_obs().audit( "coalescing.toggle", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, **t.stats()} def admin_coalescing_reset(request: Request): from ..middleware.query_coalescing import get_coalescing_tracker get_coalescing_tracker().reset() return {"ok": True} # ---- v2.42 Tenant flag overrides -------------------------------------- class TenantFlagSetRequest(BaseModel): flag_name: str tenant_value: str value: bool def admin_tenant_flags_get(): from ..middleware.tenant_flags import get_tenant_flags s = get_tenant_flags() return { "stats": s.stats(), "entries": s.all_entries(), } def admin_tenant_flags_set(req: TenantFlagSetRequest, request: Request): from ..middleware.tenant_flags import get_tenant_flags s = get_tenant_flags() s.set_override(req.flag_name, req.tenant_value, req.value) get_obs().audit( "tenant_flag.set", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), flag_name=req.flag_name, tenant_value=req.tenant_value, value=req.value, ) return {"ok": True, **s.stats()} def admin_tenant_flags_remove(flag_name: str, tenant_value: str, request: Request): from ..middleware.tenant_flags import get_tenant_flags s = get_tenant_flags() if not s.remove_override(flag_name, tenant_value): raise HTTPException(status_code=404, detail={"error": "no override", "flag_name": flag_name, "tenant_value": tenant_value}) return {"ok": True, "removed": True, "flag_name": flag_name, "tenant_value": tenant_value} # ---- v2.43 Response redaction ------------------------------------------ def admin_response_redact_get(): from ..middleware.response_redact import get_response_redactor r = get_response_redactor() return { "stats": r.stats(), "rules": r.describe(), } def admin_response_redact_clear(request: Request): from ..middleware.response_redact import get_response_redactor get_response_redactor().clear() get_obs().audit( "response_redact.clear", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ) return {"ok": True} # ---- v2.44 Config snapshot -------------------------------------------- def admin_config_snapshot(request: Request): """Export current middleware state as JSON. Secrets redacted.""" from ..middleware.config_snapshot import build_snapshot snap = build_snapshot() get_obs().audit( "config_snapshot.export", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ) return snap class ConfigRestoreRequest(BaseModel): snapshot: Dict[str, Any] def admin_config_restore(req: ConfigRestoreRequest, request: Request): """Restore middleware state from a snapshot. Returns a report of what was applied vs skipped. Secrets are never restored.""" from ..middleware.config_snapshot import restore_snapshot report = restore_snapshot(req.snapshot) get_obs().audit( "config_snapshot.restore", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), applied=report.get("applied", []), errors=report.get("errors", []), ) return report # ---- v2.45 Shadow pipeline -------------------------------------------- class ShadowConfigRequest(BaseModel): enabled: Optional[bool] = None sample_rate: Optional[float] = None def admin_shadow_get(n: int = 20): from ..middleware.shadow_pipeline import get_shadow_tracker t = get_shadow_tracker() return { "stats": t.stats(), "recent_comparisons": t.recent_comparisons(n=n), } def admin_shadow_config(req: ShadowConfigRequest, request: Request): from ..middleware.shadow_pipeline import get_shadow_tracker t = get_shadow_tracker() if req.enabled is not None: t.set_enabled(req.enabled) if req.sample_rate is not None: t.set_sample_rate(req.sample_rate) get_obs().audit( "shadow.config", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, sample_rate=req.sample_rate, ) return {"ok": True, **t.stats()} def admin_shadow_reset(request: Request): from ..middleware.shadow_pipeline import get_shadow_tracker get_shadow_tracker().reset() return {"ok": True} # ---- v2.46 Scheduled tasks --------------------------------------------- def admin_scheduler_get(): from ..middleware.scheduled_tasks import get_scheduler s = get_scheduler() return {"stats": s.stats(), "tasks": s.list_tasks()} class SchedulerControlRequest(BaseModel): action: str # "start" | "stop" def admin_scheduler_control(req: SchedulerControlRequest, request: Request): from ..middleware.scheduled_tasks import get_scheduler s = get_scheduler() if req.action == "start": started = s.start() return {"ok": True, "started": started} elif req.action == "stop": stopped = s.stop() return {"ok": True, "stopped": stopped} raise HTTPException(status_code=400, detail={"error": "action must be start|stop"}) def admin_scheduler_run_now(name: str, request: Request): from ..middleware.scheduled_tasks import get_scheduler s = get_scheduler() if not s.run_now(name): raise HTTPException(status_code=404, detail={"error": "task not found", "name": name}) return {"ok": True, "ran": name} def admin_scheduler_toggle(name: str, enabled: bool = True, request: Request = None): from ..middleware.scheduled_tasks import get_scheduler s = get_scheduler() if not s.set_enabled(name, enabled): raise HTTPException(status_code=404, detail={"error": "task not found", "name": name}) return {"ok": True, "name": name, "enabled": enabled} # ---- v2.47 Pipeline stage breakers -------------------------------------- def admin_stage_breakers_get(): from ..middleware.stage_breakers import get_stage_breakers s = get_stage_breakers() return {"stats": s.stats(), "breakers": s.all_breakers()} def admin_stage_breaker_reset(name: str, request: Request): from ..middleware.stage_breakers import get_stage_breakers s = get_stage_breakers() if not s.reset(name): raise HTTPException(status_code=404, detail={"error": "breaker not registered", "name": name}) get_obs().audit( "stage_breaker.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, ) return {"ok": True, "name": name, "state": "closed"} # ---- v2.48 Traceparent --------------------------------------------------- class TraceparentConfigRequest(BaseModel): enabled: bool def admin_traceparent_get(): from ..middleware.traceparent import get_traceparent_manager return get_traceparent_manager().stats() def admin_traceparent_set(req: TraceparentConfigRequest, request: Request): from ..middleware.traceparent import get_traceparent_manager m = get_traceparent_manager() m.set_enabled(req.enabled) get_obs().audit( "traceparent.toggle", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, **m.stats()} # ---- v2.49 JWT -------------------------------------------------------- class JWTConfigRequest(BaseModel): secret: Optional[str] = None expected_issuer: Optional[str] = None leeway_sec: Optional[float] = None def admin_jwt_get(): from ..middleware.jwt_auth import get_jwt_store return get_jwt_store().stats() def admin_jwt_config(req: JWTConfigRequest, request: Request): from ..middleware.jwt_auth import get_jwt_store s = get_jwt_store() if req.secret is not None: s.set_secret(req.secret or None) if req.expected_issuer is not None: s.set_expected_issuer(req.expected_issuer or None) if req.leeway_sec is not None: s.set_leeway(req.leeway_sec) get_obs().audit( "jwt.config", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), expected_issuer=req.expected_issuer, leeway_sec=req.leeway_sec, secret_set=(req.secret is not None), ) return {"ok": True, **s.stats()} # ---- v2.50 Bulk document import ---------------------------------------- class BulkImportItem(BaseModel): id: str text: str class BulkImportRequest(BaseModel): documents: List[BulkImportItem] @app.post("/v1/documents/bulk_async", status_code=202) def bulk_import_docs(req: BulkImportRequest, request: Request): """Start a bulk import job. Returns job_id immediately. (v2.50)""" from ..middleware.bulk_import import get_bulk_importer from ..middleware.async_jobs import get_job_queue from ..core.types import Document if not req.documents: raise HTTPException(status_code=400, detail={"error": "empty document list"}) if len(req.documents) > 5000: raise HTTPException(status_code=413, detail={"error": "batch too large", "limit": 5000, "got": len(req.documents)}) importer = get_bulk_importer() def _import_worker(docs_snapshot, job_id): importer.start_job(job_id, total=len(docs_snapshot)) # Add in chunks so the pipeline stays responsive chunk_size = 50 for i in range(0, len(docs_snapshot), chunk_size): chunk = docs_snapshot[i:i + chunk_size] try: _pipeline.add_documents([ Document(id=d["id"], text=d["text"]) for d in chunk ]) for d in chunk: importer.record_item(job_id, d["id"], ok=True) except Exception as e: error_msg = f"{type(e).__name__}: {e}"[:200] for d in chunk: importer.record_item(job_id, d["id"], ok=False, error=error_msg) importer.mark_finished(job_id) docs_data = [{"id": d.id, "text": d.text} for d in req.documents] queue = get_job_queue() # Pre-allocate a job_id so the progress entry exists before worker runs # Put a placeholder import uuid job_id = "bulk_" + uuid.uuid4().hex[:16] # Start via queue but give it our pre-chosen id via meta queue_id = queue.enqueue( lambda: _import_worker(docs_data, job_id), kind="bulk_import", meta={"bulk_job_id": job_id, "n_docs": len(docs_data)}, ) # Seed initial progress record so client polling works immediately importer.start_job(job_id, total=len(docs_data)) return {"job_id": job_id, "queue_id": queue_id, "status": "queued", "total": len(docs_data)} @app.get("/v1/documents/bulk_async/{job_id}") def bulk_import_status(job_id: str): from ..middleware.bulk_import import get_bulk_importer p = get_bulk_importer().get(job_id) if p is None: raise HTTPException(status_code=404, detail={"error": "bulk job not found", "job_id": job_id}) return p.to_dict() @app.post("/v1/documents/bulk_async/{job_id}/cancel") def bulk_import_cancel(job_id: str, request: Request): from ..middleware.bulk_import import get_bulk_importer if not get_bulk_importer().mark_cancelled(job_id): raise HTTPException(status_code=404, detail={"error": "job not cancellable", "job_id": job_id}) return {"ok": True, "job_id": job_id, "cancelled": True} @app.get("/v1/admin/bulk_import") def admin_bulk_import_list(): from ..middleware.bulk_import import get_bulk_importer i = get_bulk_importer() return {"stats": i.stats(), "jobs": i.all_jobs()} # ---- v2.51 Document ACL ------------------------------------------------ class DocACLSetRequest(BaseModel): doc_id: str labels: List[str] class DocACLLabelRequest(BaseModel): doc_id: str label: str @app.get("/v1/admin/doc_acl") def admin_doc_acl_get(): from ..middleware.doc_acl import get_doc_acl s = get_doc_acl() return {"stats": s.stats(), "entries": s.all_acls()} @app.post("/v1/admin/doc_acl/set") def admin_doc_acl_set(req: DocACLSetRequest, request: Request): from ..middleware.doc_acl import get_doc_acl s = get_doc_acl() s.set_labels(req.doc_id, req.labels) get_obs().audit( "doc_acl.set", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), doc_id=req.doc_id, labels=req.labels, ) return {"ok": True, "doc_id": req.doc_id, "labels": sorted(s.required_labels(req.doc_id))} @app.post("/v1/admin/doc_acl/add") def admin_doc_acl_add(req: DocACLLabelRequest, request: Request): from ..middleware.doc_acl import get_doc_acl s = get_doc_acl() s.add_label(req.doc_id, req.label) return {"ok": True, "doc_id": req.doc_id, "labels": sorted(s.required_labels(req.doc_id))} @app.delete("/v1/admin/doc_acl/{doc_id}") def admin_doc_acl_clear(doc_id: str, request: Request): from ..middleware.doc_acl import get_doc_acl s = get_doc_acl() if not s.clear(doc_id): raise HTTPException(status_code=404, detail={"error": "no ACL", "doc_id": doc_id}) return {"ok": True, "doc_id": doc_id, "cleared": True} # ---- v2.52 Baggage ------------------------------------------------------ class BaggageConfigRequest(BaseModel): enabled: bool @app.get("/v1/admin/baggage") def admin_baggage_get(): from ..middleware.baggage import get_baggage_manager return get_baggage_manager().stats() @app.post("/v1/admin/baggage") def admin_baggage_set(req: BaggageConfigRequest, request: Request): from ..middleware.baggage import get_baggage_manager m = get_baggage_manager() m.set_enabled(req.enabled) get_obs().audit( "baggage.toggle", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, **m.stats()} # ---- v2.53 Canary ------------------------------------------------------- class CanaryVariantRequest(BaseModel): name: str weight: float class CanarySetRoutesRequest(BaseModel): route_key: str variants: List[CanaryVariantRequest] @app.get("/v1/admin/canary") def admin_canary_get(): from ..middleware.canary import get_canary_router r = get_canary_router() routes = {k: r.get_split(k) for k in r.all_routes()} return {"stats": r.stats(), "routes": routes} @app.post("/v1/admin/canary/routes") def admin_canary_set_route(req: CanarySetRoutesRequest, request: Request): from ..middleware.canary import get_canary_router r = get_canary_router() try: r.set_variants(req.route_key, [{"name": v.name, "weight": v.weight} for v in req.variants]) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "canary.set_variants", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), route_key=req.route_key, variants=[{"name": v.name, "weight": v.weight} for v in req.variants], ) return {"ok": True, **r.get_split(req.route_key)} @app.delete("/v1/admin/canary/routes/{route_key}") def admin_canary_remove(route_key: str, request: Request): from ..middleware.canary import get_canary_router r = get_canary_router() if not r.remove_route(route_key): raise HTTPException(status_code=404, detail={"error": "route not found", "route_key": route_key}) return {"ok": True, "route_key": route_key, "removed": True} class CanaryStickyRequest(BaseModel): strategy: str # "none" | "hash" @app.post("/v1/admin/canary/sticky") def admin_canary_sticky(req: CanaryStickyRequest, request: Request): from ..middleware.canary import get_canary_router r = get_canary_router() try: r.set_sticky(req.strategy) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, **r.stats()} # ---- v2.54 Autocomplete ------------------------------------------------ @app.get("/v1/autocomplete") def autocomplete_suggest(prefix: str, limit: int = 10): """Suggest queries matching the given prefix.""" from ..middleware.autocomplete import get_autocomplete s = get_autocomplete() return {"prefix": prefix, "suggestions": s.suggest(prefix, limit=limit)} class AutocompleteObserveRequest(BaseModel): query: str @app.post("/v1/autocomplete/observe") def autocomplete_observe(req: AutocompleteObserveRequest): """Record a query for future suggestions. Usually called automatically from the search pipeline; manual endpoint for seeding + testing.""" from ..middleware.autocomplete import get_autocomplete get_autocomplete().observe(req.query) return {"ok": True} @app.get("/v1/admin/autocomplete") def admin_autocomplete_get(top_n: int = 20): from ..middleware.autocomplete import get_autocomplete s = get_autocomplete() return {"stats": s.stats(), "top": s.top(n=top_n)} class AutocompleteConfigRequest(BaseModel): enabled: bool @app.post("/v1/admin/autocomplete") def admin_autocomplete_set(req: AutocompleteConfigRequest, request: Request): from ..middleware.autocomplete import get_autocomplete s = get_autocomplete() s.set_enabled(req.enabled) return {"ok": True, **s.stats()} @app.post("/v1/admin/autocomplete/reset") def admin_autocomplete_reset(request: Request): from ..middleware.autocomplete import get_autocomplete get_autocomplete().reset() return {"ok": True} # ---- v2.55 Pre-deploy eval gate ---------------------------------------- @app.post("/v1/admin/eval_gate") def admin_eval_gate_run(request: Request): """Run all registered eval checks; return pass/fail.""" from ..middleware.eval_gate import get_eval_gate g = get_eval_gate() report = g.run() get_obs().audit( "eval_gate.run", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), passed=report["passed"], n_checks=report["n_checks"], n_failed=report["n_failed"], ) return report @app.get("/v1/admin/eval_gate") def admin_eval_gate_list(): from ..middleware.eval_gate import get_eval_gate g = get_eval_gate() return {"stats": g.stats(), "checks": g.list_checks()} # ---- v2.56 Near-duplicate detection ------------------------------------ class NearDupIndexRequest(BaseModel): doc_id: str text: str class NearDupQueryRequest(BaseModel): text: str threshold: float = 0.8 limit: int = 10 @app.get("/v1/admin/near_dup") def admin_near_dup_get(): from ..middleware.near_dup import get_near_dup_index return get_near_dup_index().stats() @app.post("/v1/admin/near_dup/index") def admin_near_dup_index(req: NearDupIndexRequest, request: Request): from ..middleware.near_dup import get_near_dup_index i = get_near_dup_index() i.index(req.doc_id, req.text) return {"ok": True, "doc_id": req.doc_id, "n_signatures": i.stats()["n_signatures"]} @app.delete("/v1/admin/near_dup/{doc_id}") def admin_near_dup_remove(doc_id: str, request: Request): from ..middleware.near_dup import get_near_dup_index i = get_near_dup_index() if not i.remove(doc_id): raise HTTPException(status_code=404, detail={"error": "not indexed", "doc_id": doc_id}) return {"ok": True, "doc_id": doc_id, "removed": True} @app.post("/v1/admin/near_dup/query") def admin_near_dup_query(req: NearDupQueryRequest): from ..middleware.near_dup import get_near_dup_index matches = get_near_dup_index().find_near_dups_for_text( req.text, threshold=req.threshold, limit=req.limit) return { "threshold": req.threshold, "matches": [{"doc_id": m.doc_id, "similarity": m.similarity} for m in matches], } @app.get("/v1/admin/near_dup/pairs") def admin_near_dup_pairs(threshold: float = 0.8): from ..middleware.near_dup import get_near_dup_index return {"pairs": get_near_dup_index().pairs(threshold=threshold)} # ---- v2.57 Query intent classification --------------------------------- class ClassifyIntentRequest(BaseModel): query: str @app.post("/v1/query/intent") def query_intent(req: ClassifyIntentRequest): """Classify a query's intent. Useful for client-side routing or UX adaptation.""" from ..middleware.query_intent import get_intent_classifier result = get_intent_classifier().classify(req.query) return { "query": req.query, "intent": result.intent, "confidence": result.confidence, "matched_pattern": result.matched_pattern, "all_scores": result.all_scores, } @app.get("/v1/admin/query_intent") def admin_query_intent_get(): from ..middleware.query_intent import get_intent_classifier return get_intent_classifier().stats() class IntentConfigRequest(BaseModel): enabled: bool @app.post("/v1/admin/query_intent") def admin_query_intent_set(req: IntentConfigRequest, request: Request): from ..middleware.query_intent import get_intent_classifier c = get_intent_classifier() c.set_enabled(req.enabled) return {"ok": True, **c.stats()} @app.post("/v1/admin/query_intent/reset") def admin_query_intent_reset(request: Request): from ..middleware.query_intent import get_intent_classifier get_intent_classifier().reset() return {"ok": True} # ---- v2.58 Document versioning ----------------------------------------- class DocVersionAddRequest(BaseModel): doc_id: str text: str note: Optional[str] = None @app.post("/v1/admin/doc_versions") def admin_doc_version_add(req: DocVersionAddRequest, request: Request): from ..middleware.doc_versioning import get_doc_versions s = get_doc_versions() actor = _hash_prefix(request.headers.get("x-api-key")) v = s.add_version(req.doc_id, req.text, created_by=actor, note=req.note) get_obs().audit( "doc_version.add", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), doc_id=req.doc_id, version_id=v.version_id, ) return {"ok": True, "version": v.to_dict()} @app.get("/v1/admin/doc_versions/{doc_id}") def admin_doc_version_list(doc_id: str): from ..middleware.doc_versioning import get_doc_versions s = get_doc_versions() return {"doc_id": doc_id, "versions": s.list_versions(doc_id)} @app.get("/v1/admin/doc_versions/{doc_id}/{version_id}") def admin_doc_version_get(doc_id: str, version_id: str): from ..middleware.doc_versioning import get_doc_versions v = get_doc_versions().get_version(doc_id, version_id) if v is None: raise HTTPException(status_code=404, detail={"error": "version not found", "doc_id": doc_id, "version_id": version_id}) return v @app.delete("/v1/admin/doc_versions/{doc_id}") def admin_doc_version_remove_doc(doc_id: str, request: Request): from ..middleware.doc_versioning import get_doc_versions n = get_doc_versions().remove_doc(doc_id) return {"ok": True, "doc_id": doc_id, "removed": n} @app.get("/v1/admin/doc_versions/{doc_id}/diff") def admin_doc_version_diff(doc_id: str, va: str, vb: str): from ..middleware.doc_versioning import get_doc_versions return get_doc_versions().diff(doc_id, va, vb) @app.get("/v1/admin/doc_versions") def admin_doc_version_stats(): from ..middleware.doc_versioning import get_doc_versions return get_doc_versions().stats() # ---- v2.59 Concurrency limit ------------------------------------------- class ConcurrencyLimitRequest(BaseModel): key_prefix: str limit: int class ConcurrencyDefaultRequest(BaseModel): default_limit: int @app.get("/v1/admin/concurrency") def admin_concurrency_get(): from ..middleware.concurrency_limit import get_concurrency_limiter l = get_concurrency_limiter() return {"stats": l.stats(), "inflight": l.all_inflight()} @app.post("/v1/admin/concurrency/key") def admin_concurrency_set_key(req: ConcurrencyLimitRequest, request: Request): from ..middleware.concurrency_limit import get_concurrency_limiter l = get_concurrency_limiter() l.set_limit(req.key_prefix, req.limit) get_obs().audit( "concurrency.set_key", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), key_prefix=req.key_prefix, limit=req.limit, ) return {"ok": True, "key_prefix": req.key_prefix, "limit": req.limit} @app.post("/v1/admin/concurrency/default") def admin_concurrency_set_default(req: ConcurrencyDefaultRequest, request: Request): from ..middleware.concurrency_limit import get_concurrency_limiter l = get_concurrency_limiter() l.set_default(req.default_limit) return {"ok": True, "default_limit": l.default_limit} # ---- v2.60 Language detection ------------------------------------------ class DetectLangRequest(BaseModel): text: str @app.post("/v1/query/detect_language") def query_detect_language(req: DetectLangRequest): from ..middleware.lang_detect import get_language_detector r = get_language_detector().detect(req.text) return { "language": r.language, "confidence": r.confidence, "scores": r.scores, "n_chars": r.n_chars_total, } @app.get("/v1/admin/language") def admin_language_get(): from ..middleware.lang_detect import get_language_detector return get_language_detector().stats() @app.post("/v1/admin/language/reset") def admin_language_reset(request: Request): from ..middleware.lang_detect import get_language_detector get_language_detector().reset() return {"ok": True} # ---- v2.61 XSS sanitizer ----------------------------------------------- class XSSPathRequest(BaseModel): path: str @app.get("/v1/admin/xss") def admin_xss_get(): from ..middleware.xss_sanitize import get_xss_sanitizer s = get_xss_sanitizer() return {"paths": s.all_paths(), "stats": s.stats()} @app.post("/v1/admin/xss/register") def admin_xss_register(req: XSSPathRequest, request: Request): from ..middleware.xss_sanitize import get_xss_sanitizer s = get_xss_sanitizer() s.register(req.path) get_obs().audit( "xss.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), path=req.path, ) return {"ok": True, "paths": s.all_paths()} @app.delete("/v1/admin/xss/{path:path}") def admin_xss_unregister(path: str, request: Request): from ..middleware.xss_sanitize import get_xss_sanitizer s = get_xss_sanitizer() if not s.unregister(path): raise HTTPException(status_code=404, detail={"error": "path not registered", "path": path}) return {"ok": True, "path": path, "removed": True} # ---- v2.62 Resource pool ------------------------------------------------ class ResourcePoolRegisterRequest(BaseModel): name: str capacity: int @app.get("/v1/admin/resource_pools") def admin_resource_pools_get(): from ..middleware.resource_pool import get_resource_pool p = get_resource_pool() return {"stats": p.stats(), "pools": p.all_pools()} @app.post("/v1/admin/resource_pools") def admin_resource_pool_register(req: ResourcePoolRegisterRequest, request: Request): from ..middleware.resource_pool import get_resource_pool p = get_resource_pool() try: pool = p.register(req.name, req.capacity) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "resource_pool.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=req.name, capacity=req.capacity, ) return {"ok": True, "name": pool.name, "capacity": pool.capacity} @app.delete("/v1/admin/resource_pools/{name}") def admin_resource_pool_remove(name: str, request: Request): from ..middleware.resource_pool import get_resource_pool p = get_resource_pool() if not p.unregister(name): raise HTTPException(status_code=404, detail={"error": "not registered", "name": name}) return {"ok": True, "name": name, "removed": True} # ---- v2.63 Cache warmup ------------------------------------------------ class CacheWarmupFromFileRequest(BaseModel): path: str class CacheWarmupListRequest(BaseModel): queries: List[Dict[str, Any]] @app.get("/v1/admin/cache_warmup") def admin_cache_warmup_get(): from ..middleware.cache_warmup import get_cache_warmer return get_cache_warmer().progress() @app.post("/v1/admin/cache_warmup/from_file") def admin_cache_warmup_from_file(req: CacheWarmupFromFileRequest, request: Request): from ..middleware.cache_warmup import get_cache_warmer w = get_cache_warmer() if not w.warm_from_file(req.path): raise HTTPException(status_code=400, detail={"error": "file not found or " "warmup already running", "path": req.path}) get_obs().audit( "cache_warmup.from_file", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), path=req.path, ) return {"ok": True, "status": "started"} @app.post("/v1/admin/cache_warmup/from_autocomplete") def admin_cache_warmup_from_autocomplete(top_n: int = 50, request: Request = None): from ..middleware.cache_warmup import get_cache_warmer w = get_cache_warmer() if not w.warm_from_autocomplete(top_n=top_n): raise HTTPException(status_code=400, detail={"error": "warmup already running"}) return {"ok": True, "status": "started", "top_n": top_n} @app.post("/v1/admin/cache_warmup/from_list") def admin_cache_warmup_from_list(req: CacheWarmupListRequest, request: Request): from ..middleware.cache_warmup import get_cache_warmer w = get_cache_warmer() if not w.warm_from_list(req.queries): raise HTTPException(status_code=400, detail={"error": "warmup already running"}) return {"ok": True, "status": "started", "n_queries": len(req.queries)} @app.post("/v1/admin/cache_warmup/cancel") def admin_cache_warmup_cancel(request: Request): from ..middleware.cache_warmup import get_cache_warmer if not get_cache_warmer().cancel(): raise HTTPException(status_code=400, detail={"error": "no warmup running"}) return {"ok": True} # ---- v2.64 Lazy init registry ------------------------------------------ @app.get("/v1/admin/lazy_init") def admin_lazy_init_get(): from ..middleware.lazy_init import get_lazy_registry r = get_lazy_registry() return {"summary": r.summary(), "entries": r.status()} @app.post("/v1/admin/lazy_init/preload/{name}") def admin_lazy_init_preload(name: str, request: Request): from ..middleware.lazy_init import get_lazy_registry r = get_lazy_registry() if not r.is_registered(name): raise HTTPException(status_code=404, detail={"error": "not registered", "name": name}) ok = r.preload(name) get_obs().audit( "lazy_init.preload", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, ok=ok, ) return {"ok": ok, "name": name, "status": r.get_status(name)} @app.post("/v1/admin/lazy_init/reset/{name}") def admin_lazy_init_reset(name: str, request: Request): from ..middleware.lazy_init import get_lazy_registry r = get_lazy_registry() if not r.reset(name): raise HTTPException(status_code=404, detail={"error": "not registered", "name": name}) get_obs().audit( "lazy_init.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=name, ) return {"ok": True, "name": name} @app.post("/v1/admin/lazy_init/preload_all") def admin_lazy_init_preload_all(request: Request): from ..middleware.lazy_init import get_lazy_registry r = get_lazy_registry() results = r.preload_all() get_obs().audit( "lazy_init.preload_all", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), n=len(results), ) return {"ok": True, "results": results, "summary": r.summary()} # ---- v2.65 Pipeline failover ------------------------------------------ class FailoverPolicyRequest(BaseModel): policy: str class FailoverPriorityRequest(BaseModel): name: str priority: int @app.get("/v1/admin/failover") def admin_failover_get(): from ..middleware.pipeline_failover import get_failover_router r = get_failover_router() return {"policy": r.policy(), "chain": r.chain(), "stats": r.stats()} @app.post("/v1/admin/failover/policy") def admin_failover_set_policy(req: FailoverPolicyRequest, request: Request): from ..middleware.pipeline_failover import get_failover_router r = get_failover_router() try: r.set_policy(req.policy) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "failover.set_policy", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), policy=req.policy, ) return {"ok": True, "policy": r.policy()} @app.post("/v1/admin/failover/priority") def admin_failover_set_priority(req: FailoverPriorityRequest, request: Request): from ..middleware.pipeline_failover import get_failover_router r = get_failover_router() if not r.set_priority(req.name, req.priority): raise HTTPException(status_code=404, detail={"error": "not registered", "name": req.name}) get_obs().audit( "failover.set_priority", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=req.name, priority=req.priority, ) return {"ok": True, "name": req.name, "priority": req.priority, "chain": r.chain()} @app.post("/v1/admin/failover/reset") def admin_failover_reset(request: Request): from ..middleware.pipeline_failover import get_failover_router get_failover_router().reset() get_obs().audit( "failover.reset", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), ) return {"ok": True} # ---- v2.66 Intent-aware reranker --------------------------------------- class IntentRerankEnableRequest(BaseModel): enabled: bool class IntentRerankWeightRequest(BaseModel): intent: str doc_type: str weight: float @app.get("/v1/admin/intent_rerank") def admin_intent_rerank_get(): from ..middleware.intent_rerank import get_intent_reranker r = get_intent_reranker() return {"stats": r.stats(), "weights": r.weights()} @app.post("/v1/admin/intent_rerank/enable") def admin_intent_rerank_enable(req: IntentRerankEnableRequest, request: Request): from ..middleware.intent_rerank import get_intent_reranker r = get_intent_reranker() r.set_enabled(req.enabled) get_obs().audit( "intent_rerank.enable", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, "enabled": r.is_enabled()} @app.post("/v1/admin/intent_rerank/weight") def admin_intent_rerank_set_weight(req: IntentRerankWeightRequest, request: Request): from ..middleware.intent_rerank import get_intent_reranker r = get_intent_reranker() try: r.set_weight(req.intent, req.doc_type, req.weight) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "intent_rerank.set_weight", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), intent=req.intent, doc_type=req.doc_type, weight=req.weight, ) return {"ok": True, "intent": req.intent, "doc_type": req.doc_type, "weight": r.get_weight(req.intent, req.doc_type)} @app.post("/v1/admin/intent_rerank/reset") def admin_intent_rerank_reset(request: Request): from ..middleware.intent_rerank import get_intent_reranker get_intent_reranker().reset() return {"ok": True} # ---- v2.67 Stage budgets ------------------------------------------------ class StageBudgetRegisterRequest(BaseModel): stage: str budget_ms: float class StageBudgetTrackRequest(BaseModel): stage: str duration_ms: float @app.get("/v1/admin/stage_budgets") def admin_stage_budgets_get(): from ..middleware.stage_budgets import get_stage_budgets return get_stage_budgets().stats() @app.post("/v1/admin/stage_budgets") def admin_stage_budgets_register(req: StageBudgetRegisterRequest, request: Request): from ..middleware.stage_budgets import get_stage_budgets s = get_stage_budgets() try: s.register(req.stage, req.budget_ms) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "stage_budgets.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), stage=req.stage, budget_ms=req.budget_ms, ) return {"ok": True, "stage": req.stage, "budget_ms": req.budget_ms} @app.delete("/v1/admin/stage_budgets/{stage}") def admin_stage_budgets_unregister(stage: str, request: Request): from ..middleware.stage_budgets import get_stage_budgets if not get_stage_budgets().unregister(stage): raise HTTPException(status_code=404, detail={"error": "not registered", "stage": stage}) return {"ok": True, "stage": stage, "removed": True} @app.post("/v1/admin/stage_budgets/track") def admin_stage_budgets_track(req: StageBudgetTrackRequest): from ..middleware.stage_budgets import get_stage_budgets result = get_stage_budgets().track(req.stage, req.duration_ms) return result.to_dict() @app.post("/v1/admin/stage_budgets/reset") def admin_stage_budgets_reset(request: Request, stage: Optional[str] = None): from ..middleware.stage_budgets import get_stage_budgets get_stage_budgets().reset(stage) return {"ok": True, "stage": stage or "all"} # ---- v2.68 Graceful degradation ---------------------------------------- class DegradationModeRequest(BaseModel): mode: str reason: Optional[str] = "manual" class DegradationFeatureRequest(BaseModel): feature: str tier: int @app.get("/v1/admin/degradation") def admin_degradation_get(): from ..middleware.graceful_degradation import get_graceful_degradation return get_graceful_degradation().snapshot() @app.post("/v1/admin/degradation/mode") def admin_degradation_set_mode(req: DegradationModeRequest, request: Request): from ..middleware.graceful_degradation import get_graceful_degradation gd = get_graceful_degradation() try: trans = gd.set_mode(req.mode, reason=req.reason or "manual", actor=request.headers.get("x-api-key")) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "degradation.set_mode", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), from_mode=trans.from_mode, to_mode=trans.to_mode, reason=trans.reason, ) return trans.to_dict() @app.post("/v1/admin/degradation/degrade") def admin_degradation_degrade(request: Request, reason: str = "load"): from ..middleware.graceful_degradation import get_graceful_degradation gd = get_graceful_degradation() trans = gd.degrade(reason=reason) if trans is None: raise HTTPException(status_code=400, detail={"error": "already at emergency"}) return trans.to_dict() @app.post("/v1/admin/degradation/recover") def admin_degradation_recover(request: Request, reason: str = "load_subsided"): from ..middleware.graceful_degradation import get_graceful_degradation gd = get_graceful_degradation() trans = gd.recover(reason=reason) if trans is None: raise HTTPException(status_code=400, detail={"error": "already normal"}) return trans.to_dict() @app.post("/v1/admin/degradation/feature") def admin_degradation_register_feature(req: DegradationFeatureRequest, request: Request): from ..middleware.graceful_degradation import get_graceful_degradation gd = get_graceful_degradation() try: gd.register_feature(req.feature, req.tier) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "degradation.register_feature", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), feature=req.feature, tier=req.tier, ) return {"ok": True, "feature": req.feature, "tier": req.tier} # ---- v2.69 Query rewrite pipeline -------------------------------------- class QueryRewriteEnableRequest(BaseModel): enabled: bool class QueryRewriteStepToggleRequest(BaseModel): name: str enabled: bool class QueryRewriteStepOrderRequest(BaseModel): name: str order: int class QueryRewriteRequest(BaseModel): query: str @app.get("/v1/admin/query_rewrite") def admin_query_rewrite_get(): from ..middleware.query_rewrite import get_query_rewrite return get_query_rewrite().stats() @app.post("/v1/admin/query_rewrite/enable") def admin_query_rewrite_enable(req: QueryRewriteEnableRequest, request: Request): from ..middleware.query_rewrite import get_query_rewrite p = get_query_rewrite() p.set_enabled(req.enabled) get_obs().audit( "query_rewrite.enable", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, "enabled": p.is_enabled()} @app.post("/v1/admin/query_rewrite/step/toggle") def admin_query_rewrite_step_toggle(req: QueryRewriteStepToggleRequest, request: Request): from ..middleware.query_rewrite import get_query_rewrite if not get_query_rewrite().set_step_enabled(req.name, req.enabled): raise HTTPException(status_code=404, detail={"error": "step not found", "name": req.name}) return {"ok": True, "name": req.name, "enabled": req.enabled} @app.post("/v1/admin/query_rewrite/step/order") def admin_query_rewrite_step_order(req: QueryRewriteStepOrderRequest, request: Request): from ..middleware.query_rewrite import get_query_rewrite if not get_query_rewrite().set_step_order(req.name, req.order): raise HTTPException(status_code=404, detail={"error": "step not found", "name": req.name}) return {"ok": True, "name": req.name, "order": req.order} @app.post("/v1/admin/query_rewrite/try") def admin_query_rewrite_try(req: QueryRewriteRequest): from ..middleware.query_rewrite import get_query_rewrite result = get_query_rewrite().rewrite(req.query) return result.to_dict() @app.post("/v1/admin/query_rewrite/reset") def admin_query_rewrite_reset(request: Request): from ..middleware.query_rewrite import get_query_rewrite get_query_rewrite().reset() return {"ok": True} # ---- v2.70 Semantic cache ---------------------------------------------- class SemanticCacheCapacityRequest(BaseModel): capacity: int class SemanticCacheTTLRequest(BaseModel): ttl_sec: Optional[float] = None @app.get("/v1/admin/semantic_cache") def admin_semantic_cache_get(limit: int = 20): from ..middleware.semantic_cache import get_semantic_cache c = get_semantic_cache() return {"stats": c.stats(), "entries": c.entries(limit=limit)} @app.post("/v1/admin/semantic_cache/capacity") def admin_semantic_cache_set_capacity(req: SemanticCacheCapacityRequest, request: Request): from ..middleware.semantic_cache import get_semantic_cache c = get_semantic_cache() try: c.set_capacity(req.capacity) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "semantic_cache.set_capacity", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), capacity=req.capacity, ) return {"ok": True, "capacity": req.capacity} @app.post("/v1/admin/semantic_cache/ttl") def admin_semantic_cache_set_ttl(req: SemanticCacheTTLRequest, request: Request): from ..middleware.semantic_cache import get_semantic_cache c = get_semantic_cache() try: c.set_default_ttl(req.ttl_sec) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "ttl_sec": req.ttl_sec} @app.post("/v1/admin/semantic_cache/purge_expired") def admin_semantic_cache_purge_expired(request: Request): from ..middleware.semantic_cache import get_semantic_cache n = get_semantic_cache().purge_expired() return {"ok": True, "purged": n} @app.post("/v1/admin/semantic_cache/clear") def admin_semantic_cache_clear(request: Request): from ..middleware.semantic_cache import get_semantic_cache n = get_semantic_cache().clear() get_obs().audit( "semantic_cache.clear", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), cleared=n, ) return {"ok": True, "cleared": n} # ---- v2.71 Score calibration ------------------------------------------- class ScoreCalibRegisterRequest(BaseModel): name: str method: str = "minmax" window: int = 512 warmup_n: int = 20 class ScoreCalibMethodRequest(BaseModel): name: str method: str @app.get("/v1/admin/score_calibration") def admin_score_calib_get(): from ..middleware.score_calibration import get_score_calibrator return get_score_calibrator().stats() @app.post("/v1/admin/score_calibration") def admin_score_calib_register(req: ScoreCalibRegisterRequest, request: Request): from ..middleware.score_calibration import get_score_calibrator c = get_score_calibrator() try: c.register(req.name, method=req.method, window=req.window, warmup_n=req.warmup_n) except (ValueError, KeyError) as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "score_calibration.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=req.name, method=req.method, ) return {"ok": True, "name": req.name, "method": req.method} @app.post("/v1/admin/score_calibration/method") def admin_score_calib_set_method(req: ScoreCalibMethodRequest, request: Request): from ..middleware.score_calibration import get_score_calibrator c = get_score_calibrator() try: ok = c.set_method(req.name, req.method) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) if not ok: raise HTTPException(status_code=404, detail={"error": "not registered", "name": req.name}) return {"ok": True, "name": req.name, "method": req.method} @app.delete("/v1/admin/score_calibration/{name}") def admin_score_calib_unregister(name: str, request: Request): from ..middleware.score_calibration import get_score_calibrator if not get_score_calibrator().unregister(name): raise HTTPException(status_code=404, detail={"error": "not registered", "name": name}) return {"ok": True, "name": name, "removed": True} @app.post("/v1/admin/score_calibration/reset") def admin_score_calib_reset(request: Request, name: Optional[str] = None): from ..middleware.score_calibration import get_score_calibrator get_score_calibrator().reset(name) return {"ok": True, "name": name or "all"} # ---- v2.72 Answer postprocessor ---------------------------------------- class PostprocessorEnableRequest(BaseModel): enabled: bool class PostprocessorStepToggleRequest(BaseModel): name: str enabled: bool class PostprocessorMaxCharsRequest(BaseModel): max_chars: Optional[int] = None class PostprocessorProcessRequest(BaseModel): text: str @app.get("/v1/admin/postprocess") def admin_postprocess_get(): from ..middleware.answer_postprocess import get_answer_postprocessor return get_answer_postprocessor().stats() @app.post("/v1/admin/postprocess/enable") def admin_postprocess_enable(req: PostprocessorEnableRequest, request: Request): from ..middleware.answer_postprocess import get_answer_postprocessor p = get_answer_postprocessor() p.set_enabled(req.enabled) get_obs().audit( "postprocess.enable", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, "enabled": p.is_enabled()} @app.post("/v1/admin/postprocess/step/toggle") def admin_postprocess_step_toggle(req: PostprocessorStepToggleRequest, request: Request): from ..middleware.answer_postprocess import get_answer_postprocessor if not get_answer_postprocessor().set_step_enabled(req.name, req.enabled): raise HTTPException(status_code=404, detail={"error": "step not found", "name": req.name}) return {"ok": True, "name": req.name, "enabled": req.enabled} @app.post("/v1/admin/postprocess/max_chars") def admin_postprocess_max_chars(req: PostprocessorMaxCharsRequest, request: Request): from ..middleware.answer_postprocess import get_answer_postprocessor get_answer_postprocessor().set_max_output_chars(req.max_chars) return {"ok": True, "max_chars": req.max_chars} @app.post("/v1/admin/postprocess/try") def admin_postprocess_try(req: PostprocessorProcessRequest): from ..middleware.answer_postprocess import get_answer_postprocessor result = get_answer_postprocessor().process(req.text) return result.to_dict() @app.post("/v1/admin/postprocess/reset") def admin_postprocess_reset(request: Request): from ..middleware.answer_postprocess import get_answer_postprocessor get_answer_postprocessor().reset() return {"ok": True} # ---- v2.73 Retriever health probes ------------------------------------- @app.get("/v1/admin/retriever_health") def admin_retriever_health_get(): from ..middleware.retriever_health import get_retriever_health return get_retriever_health().stats() @app.post("/v1/admin/retriever_health/probe/{name}") def admin_retriever_health_probe(name: str, request: Request): from ..middleware.retriever_health import get_retriever_health t = get_retriever_health() if not t.is_registered(name): raise HTTPException(status_code=404, detail={"error": "not registered", "name": name}) result = t.probe(name) return result.to_dict() @app.post("/v1/admin/retriever_health/probe_all") def admin_retriever_health_probe_all(request: Request): from ..middleware.retriever_health import get_retriever_health results = get_retriever_health().probe_all() return {"results": {n: r.to_dict() for n, r in results.items()}} @app.delete("/v1/admin/retriever_health/{name}") def admin_retriever_health_unregister(name: str, request: Request): from ..middleware.retriever_health import get_retriever_health if not get_retriever_health().unregister(name): raise HTTPException(status_code=404, detail={"error": "not registered", "name": name}) return {"ok": True, "name": name, "removed": True} @app.post("/v1/admin/retriever_health/reset") def admin_retriever_health_reset(request: Request, name: Optional[str] = None): from ..middleware.retriever_health import get_retriever_health get_retriever_health().reset(name) return {"ok": True, "name": name or "all"} # ---- v2.74 Stream throttle --------------------------------------------- class StreamRateRequest(BaseModel): stream_id: str rate_per_sec: float class StreamDefaultsRequest(BaseModel): rate: Optional[float] = None burst: Optional[float] = None min_gap_ms: Optional[float] = None @app.get("/v1/admin/stream_throttle") def admin_stream_throttle_get(): from ..middleware.stream_throttle import get_stream_throttle return get_stream_throttle().stats() @app.post("/v1/admin/stream_throttle/rate") def admin_stream_throttle_set_rate(req: StreamRateRequest, request: Request): from ..middleware.stream_throttle import get_stream_throttle t = get_stream_throttle() try: ok = t.set_rate(req.stream_id, req.rate_per_sec) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) if not ok: raise HTTPException(status_code=404, detail={"error": "stream not open", "stream_id": req.stream_id}) return {"ok": True, "stream_id": req.stream_id, "rate_per_sec": req.rate_per_sec} @app.post("/v1/admin/stream_throttle/defaults") def admin_stream_throttle_set_defaults(req: StreamDefaultsRequest, request: Request): from ..middleware.stream_throttle import get_stream_throttle t = get_stream_throttle() try: t.set_defaults(rate=req.rate, burst=req.burst, min_gap_ms=req.min_gap_ms) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "stream_throttle.set_defaults", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), rate=req.rate, burst=req.burst, min_gap_ms=req.min_gap_ms, ) return {"ok": True} @app.post("/v1/admin/stream_throttle/reset") def admin_stream_throttle_reset(request: Request): from ..middleware.stream_throttle import get_stream_throttle get_stream_throttle().reset() return {"ok": True} # ---- v2.75 Context window sizer ---------------------------------------- class ContextModelRequest(BaseModel): name: str max_context: int default_output: int = 1024 class ContextBudgetRequest(BaseModel): model: str system_tokens: int = 0 query_tokens: int = 0 history_tokens: int = 0 output_reserve: Optional[int] = None safety_margin: float = 0.05 @app.get("/v1/admin/context_sizer") def admin_context_sizer_get(): from ..middleware.context_sizer import get_context_sizer return get_context_sizer().stats() @app.post("/v1/admin/context_sizer/model") def admin_context_sizer_register_model(req: ContextModelRequest, request: Request): from ..middleware.context_sizer import get_context_sizer s = get_context_sizer() try: s.register_model(req.name, max_context=req.max_context, default_output=req.default_output, replace=True) except (ValueError, KeyError) as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "context_sizer.register_model", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=req.name, max_context=req.max_context, ) return {"ok": True, "name": req.name, "max_context": req.max_context} @app.delete("/v1/admin/context_sizer/model/{name}") def admin_context_sizer_unregister_model(name: str, request: Request): from ..middleware.context_sizer import get_context_sizer if not get_context_sizer().unregister_model(name): raise HTTPException(status_code=404, detail={"error": "not registered", "name": name}) return {"ok": True, "name": name, "removed": True} @app.post("/v1/admin/context_sizer/budget") def admin_context_sizer_budget(req: ContextBudgetRequest): from ..middleware.context_sizer import get_context_sizer s = get_context_sizer() try: result = s.budget( model=req.model, system_tokens=req.system_tokens, query_tokens=req.query_tokens, history_tokens=req.history_tokens, output_reserve=req.output_reserve, safety_margin=req.safety_margin, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/context_sizer/reset") def admin_context_sizer_reset(request: Request): from ..middleware.context_sizer import get_context_sizer get_context_sizer().reset() return {"ok": True} # ---- v2.76 Answer confidence ------------------------------------------- class ConfidenceWeightRequest(BaseModel): signal: str weight: float class ConfidenceThresholdRequest(BaseModel): high: float medium: float class ConfidenceScoreRequest(BaseModel): retrieval_count: Optional[int] = None min_k: int = 3 top_score: Optional[float] = None second_score: Optional[float] = None verifier_pass: Optional[bool] = None n_citations: Optional[int] = None n_claims: Optional[int] = None intent_confidence: Optional[float] = None @app.get("/v1/admin/confidence") def admin_confidence_get(): from ..middleware.answer_confidence import get_confidence_scorer return get_confidence_scorer().stats() @app.post("/v1/admin/confidence/weight") def admin_confidence_set_weight(req: ConfidenceWeightRequest, request: Request): from ..middleware.answer_confidence import get_confidence_scorer try: get_confidence_scorer().set_weight(req.signal, req.weight) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "confidence.set_weight", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), signal=req.signal, weight=req.weight, ) return {"ok": True, "signal": req.signal, "weight": req.weight} @app.post("/v1/admin/confidence/thresholds") def admin_confidence_set_thresholds(req: ConfidenceThresholdRequest, request: Request): from ..middleware.answer_confidence import get_confidence_scorer try: get_confidence_scorer().set_thresholds(req.high, req.medium) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "high": req.high, "medium": req.medium} @app.post("/v1/admin/confidence/score") def admin_confidence_score(req: ConfidenceScoreRequest): from ..middleware.answer_confidence import ( get_confidence_scorer, ConfidenceSignals, ) signals = ConfidenceSignals( retrieval_count=req.retrieval_count, min_k=req.min_k, top_score=req.top_score, second_score=req.second_score, verifier_pass=req.verifier_pass, n_citations=req.n_citations, n_claims=req.n_claims, intent_confidence=req.intent_confidence, ) result = get_confidence_scorer().score(signals) return result.to_dict() @app.post("/v1/admin/confidence/reset") def admin_confidence_reset(request: Request): from ..middleware.answer_confidence import get_confidence_scorer get_confidence_scorer().reset() return {"ok": True} # ---- v2.77 Legal entity extraction ------------------------------------- class EntityExtractRequest(BaseModel): text: str class EntityTypeToggleRequest(BaseModel): type: str enabled: bool @app.get("/v1/admin/legal_entities") def admin_legal_entities_get(): from ..middleware.legal_entities import get_legal_entity_extractor return get_legal_entity_extractor().stats() @app.post("/v1/admin/legal_entities/extract") def admin_legal_entities_extract(req: EntityExtractRequest): from ..middleware.legal_entities import get_legal_entity_extractor result = get_legal_entity_extractor().extract(req.text) return result.to_dict() @app.post("/v1/admin/legal_entities/type") def admin_legal_entities_toggle_type(req: EntityTypeToggleRequest, request: Request): from ..middleware.legal_entities import get_legal_entity_extractor get_legal_entity_extractor().set_type_enabled(req.type, req.enabled) return {"ok": True, "type": req.type, "enabled": req.enabled} @app.post("/v1/admin/legal_entities/reset") def admin_legal_entities_reset(request: Request): from ..middleware.legal_entities import get_legal_entity_extractor get_legal_entity_extractor().reset() return {"ok": True} # ---- v2.78 Conversation summarizer ------------------------------------- class SummarizerPolicyRequest(BaseModel): policy: str class SummarizerConfigRequest(BaseModel): window_size: Optional[int] = None head_size: Optional[int] = None min_turns_to_compress: Optional[int] = None class SummarizerCompressRequest(BaseModel): turns: List[Dict[str, Any]] @app.get("/v1/admin/conversation_summarizer") def admin_conversation_summarizer_get(): from ..middleware.conversation_summarizer import get_conversation_summarizer s = get_conversation_summarizer() return {"config": s.config(), "stats": s.stats()} @app.post("/v1/admin/conversation_summarizer/policy") def admin_conversation_summarizer_set_policy(req: SummarizerPolicyRequest, request: Request): from ..middleware.conversation_summarizer import get_conversation_summarizer try: get_conversation_summarizer().set_policy(req.policy) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "conversation_summarizer.set_policy", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), policy=req.policy, ) return {"ok": True, "policy": req.policy} @app.post("/v1/admin/conversation_summarizer/config") def admin_conversation_summarizer_config(req: SummarizerConfigRequest, request: Request): from ..middleware.conversation_summarizer import get_conversation_summarizer s = get_conversation_summarizer() try: if req.window_size is not None: s.set_window_size(req.window_size) if req.head_size is not None: s.set_head_size(req.head_size) if req.min_turns_to_compress is not None: s.set_min_turns_to_compress(req.min_turns_to_compress) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "config": s.config()} @app.post("/v1/admin/conversation_summarizer/try") def admin_conversation_summarizer_try(req: SummarizerCompressRequest): from ..middleware.conversation_summarizer import get_conversation_summarizer result = get_conversation_summarizer().compress(req.turns) return result.to_dict() @app.post("/v1/admin/conversation_summarizer/reset") def admin_conversation_summarizer_reset(request: Request): from ..middleware.conversation_summarizer import get_conversation_summarizer get_conversation_summarizer().reset() return {"ok": True} # ---- v2.79 Diversity ranker -------------------------------------------- class DiversityLambdaRequest(BaseModel): lambda_weight: float class DiversitySimRequest(BaseModel): similarity: str class DiversityRankRequest(BaseModel): results: List[Dict[str, Any]] k: Optional[int] = None @app.get("/v1/admin/diversity") def admin_diversity_get(): from ..middleware.diversity_ranker import get_diversity_ranker r = get_diversity_ranker() return {"config": r.config(), "stats": r.stats()} @app.post("/v1/admin/diversity/lambda") def admin_diversity_set_lambda(req: DiversityLambdaRequest, request: Request): from ..middleware.diversity_ranker import get_diversity_ranker try: get_diversity_ranker().set_lambda(req.lambda_weight) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "diversity.set_lambda", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), lambda_weight=req.lambda_weight, ) return {"ok": True, "lambda": req.lambda_weight} @app.post("/v1/admin/diversity/similarity") def admin_diversity_set_similarity(req: DiversitySimRequest, request: Request): from ..middleware.diversity_ranker import get_diversity_ranker try: get_diversity_ranker().set_similarity(req.similarity) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "similarity": req.similarity} @app.post("/v1/admin/diversity/rank") def admin_diversity_rank(req: DiversityRankRequest): from ..middleware.diversity_ranker import get_diversity_ranker reranked, decisions = get_diversity_ranker().rank( req.results, k=req.k) return { "reranked": reranked, "decisions": [d.to_dict() for d in decisions], } @app.post("/v1/admin/diversity/reset") def admin_diversity_reset(request: Request): from ..middleware.diversity_ranker import get_diversity_ranker get_diversity_ranker().reset() return {"ok": True} # ---- v2.80 Synonym expansion ------------------------------------------- class SynonymGroupRequest(BaseModel): group_id: str terms: List[str] case_sensitive: bool = False class SynonymExpandRequest(BaseModel): query: str joiner: str = " OR " @app.get("/v1/admin/synonyms") def admin_synonyms_get(): from ..middleware.synonym_expansion import get_synonym_expander return get_synonym_expander().stats() @app.post("/v1/admin/synonyms/group") def admin_synonyms_register_group(req: SynonymGroupRequest, request: Request): from ..middleware.synonym_expansion import get_synonym_expander try: get_synonym_expander().register_group( req.group_id, req.terms, case_sensitive=req.case_sensitive, replace=True) except (ValueError, KeyError) as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "synonyms.register_group", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), group_id=req.group_id, n_terms=len(req.terms), ) return {"ok": True, "group_id": req.group_id, "n_terms": len(req.terms)} @app.delete("/v1/admin/synonyms/group/{group_id}") def admin_synonyms_unregister_group(group_id: str, request: Request): from ..middleware.synonym_expansion import get_synonym_expander if not get_synonym_expander().unregister_group(group_id): raise HTTPException(status_code=404, detail={"error": "group not found", "group_id": group_id}) return {"ok": True, "group_id": group_id, "removed": True} @app.post("/v1/admin/synonyms/expand") def admin_synonyms_expand(req: SynonymExpandRequest): from ..middleware.synonym_expansion import get_synonym_expander result = get_synonym_expander().expand(req.query, joiner=req.joiner) return result.to_dict() @app.post("/v1/admin/synonyms/reset_stats") def admin_synonyms_reset_stats(request: Request): from ..middleware.synonym_expansion import get_synonym_expander get_synonym_expander().reset_stats() return {"ok": True} # ---- v2.81 Feedback learning ------------------------------------------- class FeedbackRecordRequest(BaseModel): query_id: str retriever: str feedback: str score: Optional[float] = None comment: Optional[str] = None class FeedbackLRRequest(BaseModel): learning_rate: float class FeedbackRegisterRequest(BaseModel): name: str initial_weight: float = 1.0 @app.get("/v1/admin/feedback") def admin_feedback_get(): from ..middleware.feedback_learning import get_feedback_learner return get_feedback_learner().stats() @app.post("/v1/admin/feedback/record") def admin_feedback_record(req: FeedbackRecordRequest, request: Request): from ..middleware.feedback_learning import get_feedback_learner try: get_feedback_learner().record( query_id=req.query_id, retriever=req.retriever, feedback=req.feedback, score=req.score, comment=req.comment, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/feedback/register") def admin_feedback_register(req: FeedbackRegisterRequest, request: Request): from ..middleware.feedback_learning import get_feedback_learner try: get_feedback_learner().register_retriever( req.name, initial_weight=req.initial_weight) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "name": req.name, "initial_weight": req.initial_weight} @app.get("/v1/admin/feedback/propose") def admin_feedback_propose(): from ..middleware.feedback_learning import get_feedback_learner updates = get_feedback_learner().propose_updates() return {"proposals": [u.to_dict() for u in updates]} @app.post("/v1/admin/feedback/apply") def admin_feedback_apply(request: Request): from ..middleware.feedback_learning import get_feedback_learner changed = get_feedback_learner().apply_updates() get_obs().audit( "feedback.apply_updates", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), n_changed=changed, ) return {"ok": True, "changed": changed} @app.post("/v1/admin/feedback/lr") def admin_feedback_set_lr(req: FeedbackLRRequest, request: Request): from ..middleware.feedback_learning import get_feedback_learner try: get_feedback_learner().set_learning_rate(req.learning_rate) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "learning_rate": req.learning_rate} @app.post("/v1/admin/feedback/reset") def admin_feedback_reset(request: Request): from ..middleware.feedback_learning import get_feedback_learner get_feedback_learner().reset() return {"ok": True} # ---- v2.82 Embedding compression --------------------------------------- class EmbeddingCompressRequest(BaseModel): embedding: List[float] class EmbeddingFloat16Request(BaseModel): use_float16: bool @app.get("/v1/admin/embedding_compress") def admin_embedding_compress_get(): from ..middleware.embedding_compress import get_embedding_compressor return get_embedding_compressor().stats() @app.post("/v1/admin/embedding_compress/mode") def admin_embedding_compress_set_mode(req: EmbeddingFloat16Request, request: Request): from ..middleware.embedding_compress import get_embedding_compressor get_embedding_compressor().set_use_float16(req.use_float16) get_obs().audit( "embedding_compress.set_mode", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), use_float16=req.use_float16, ) return {"ok": True, "use_float16": req.use_float16} @app.post("/v1/admin/embedding_compress/try") def admin_embedding_compress_try(req: EmbeddingCompressRequest): from ..middleware.embedding_compress import ( get_embedding_compressor, cosine_similarity, ) c = get_embedding_compressor() original = req.embedding blob = c.compress(original) reconstructed = c.decompress(blob).tolist() sim = cosine_similarity(original, reconstructed) return { "original_bytes": 4 * len(original), "compressed_bytes": len(blob), "ratio": round((4 * len(original)) / max(len(blob), 1), 4), "cosine_similarity": round(sim, 6), "dim": len(original), } @app.post("/v1/admin/embedding_compress/reset") def admin_embedding_compress_reset(request: Request): from ..middleware.embedding_compress import get_embedding_compressor get_embedding_compressor().reset() return {"ok": True} # ---- v2.83 Phrase detector --------------------------------------------- class PhraseTrainRequest(BaseModel): documents: List[str] verbose: bool = False class PhraseTokenizeRequest(BaseModel): text: str class PhraseConfigRequest(BaseModel): min_frequency: Optional[int] = None max_tau: Optional[float] = None max_phrase_length: Optional[int] = None @app.get("/v1/admin/phrases") def admin_phrases_get(limit: int = 100): from ..middleware.phrase_detector import get_phrase_detector d = get_phrase_detector() return { "stats": d.stats(), "phrases": d.phrases(limit=limit), } @app.post("/v1/admin/phrases/train") def admin_phrases_train(req: PhraseTrainRequest, request: Request): from ..middleware.phrase_detector import get_phrase_detector d = get_phrase_detector() discovered = d.train(req.documents, verbose=req.verbose) get_obs().audit( "phrases.train", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), n_docs=len(req.documents), n_discovered=len(discovered), ) return { "ok": True, "n_docs": len(req.documents), "n_phrases_discovered": len(discovered), } @app.post("/v1/admin/phrases/tokenize") def admin_phrases_tokenize(req: PhraseTokenizeRequest): from ..middleware.phrase_detector import get_phrase_detector d = get_phrase_detector() tokens = d.tokenize(req.text) return { "tokens": tokens, "n_tokens": len(tokens), "applied_phrases": [t for t in tokens if "_" in t], } @app.post("/v1/admin/phrases/config") def admin_phrases_set_config(req: PhraseConfigRequest, request: Request): from ..middleware.phrase_detector import get_phrase_detector d = get_phrase_detector() try: if req.min_frequency is not None: d.set_min_frequency(req.min_frequency) if req.max_tau is not None: d.set_max_tau(req.max_tau) if req.max_phrase_length is not None: d.set_max_phrase_length(req.max_phrase_length) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "config": d.config()} @app.post("/v1/admin/phrases/reset") def admin_phrases_reset(request: Request): from ..middleware.phrase_detector import get_phrase_detector get_phrase_detector().reset() return {"ok": True} # ---- v2.84 Meaning preservation ---------------------------------------- class MeaningValidateRequest(BaseModel): source: str target: str class MeaningThresholdRequest(BaseModel): pass_threshold: float warn_threshold: float class MeaningWeightRequest(BaseModel): role: str weight: float class MeaningVocabRequest(BaseModel): role: str word: str @app.get("/v1/admin/meaning") def admin_meaning_get(): from ..middleware.meaning_preservation import get_meaning_validator return get_meaning_validator().stats() @app.post("/v1/admin/meaning/validate") def admin_meaning_validate(req: MeaningValidateRequest): from ..middleware.meaning_preservation import get_meaning_validator result = get_meaning_validator().validate(req.source, req.target) return result.to_dict() @app.post("/v1/admin/meaning/thresholds") def admin_meaning_set_thresholds(req: MeaningThresholdRequest, request: Request): from ..middleware.meaning_preservation import get_meaning_validator try: get_meaning_validator().set_thresholds( req.pass_threshold, req.warn_threshold) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "pass": req.pass_threshold, "warn": req.warn_threshold} @app.post("/v1/admin/meaning/weight") def admin_meaning_set_weight(req: MeaningWeightRequest, request: Request): from ..middleware.meaning_preservation import get_meaning_validator try: get_meaning_validator().set_weight(req.role, req.weight) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "role": req.role, "weight": req.weight} @app.post("/v1/admin/meaning/vocab/add") def admin_meaning_vocab_add(req: MeaningVocabRequest, request: Request): from ..middleware.meaning_preservation import get_meaning_validator try: get_meaning_validator().add_role_word(req.role, req.word) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "role": req.role, "word": req.word} @app.post("/v1/admin/meaning/reset") def admin_meaning_reset(request: Request): from ..middleware.meaning_preservation import get_meaning_validator get_meaning_validator().reset() return {"ok": True} # ---- v2.85 Retrieval explainability ------------------------------------ class ExplainVerbosityRequest(BaseModel): verbosity: str class ExplainBatchRequest(BaseModel): query: str results: List[Dict[str, Any]] @app.get("/v1/admin/retrieval_explain") def admin_retrieval_explain_get(): from ..middleware.retrieval_explain import get_retrieval_explainer return get_retrieval_explainer().stats() @app.post("/v1/admin/retrieval_explain/verbosity") def admin_retrieval_explain_set_verbosity(req: ExplainVerbosityRequest, request: Request): from ..middleware.retrieval_explain import get_retrieval_explainer try: get_retrieval_explainer().set_verbosity(req.verbosity) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "verbosity": req.verbosity} @app.post("/v1/admin/retrieval_explain/batch") def admin_retrieval_explain_batch(req: ExplainBatchRequest): from ..middleware.retrieval_explain import get_retrieval_explainer annotated = get_retrieval_explainer().explain_batch( req.query, req.results) return {"annotated": annotated} # ---- v2.86 Smart chunking ---------------------------------------------- class ChunkerCapsRequest(BaseModel): hard_cap_chars: int soft_cap_chars: int class ChunkerOverlapRequest(BaseModel): overlap_chars: int class ChunkerModeRequest(BaseModel): mode: str class ChunkRequest(BaseModel): text: str metadata: Optional[Dict[str, Any]] = None @app.get("/v1/admin/chunker") def admin_chunker_get(): from ..middleware.smart_chunker import get_smart_chunker return get_smart_chunker().stats() @app.post("/v1/admin/chunker/caps") def admin_chunker_set_caps(req: ChunkerCapsRequest, request: Request): from ..middleware.smart_chunker import get_smart_chunker try: get_smart_chunker().set_caps(req.hard_cap_chars, req.soft_cap_chars) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/chunker/overlap") def admin_chunker_set_overlap(req: ChunkerOverlapRequest, request: Request): from ..middleware.smart_chunker import get_smart_chunker try: get_smart_chunker().set_overlap(req.overlap_chars) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "overlap_chars": req.overlap_chars} @app.post("/v1/admin/chunker/mode") def admin_chunker_set_mode(req: ChunkerModeRequest, request: Request): from ..middleware.smart_chunker import get_smart_chunker try: get_smart_chunker().set_mode(req.mode) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "mode": req.mode} @app.post("/v1/admin/chunker/chunk") def admin_chunker_chunk(req: ChunkRequest): from ..middleware.smart_chunker import get_smart_chunker result = get_smart_chunker().chunk(req.text, metadata=req.metadata) return result.to_dict() @app.post("/v1/admin/chunker/reset") def admin_chunker_reset(request: Request): from ..middleware.smart_chunker import get_smart_chunker get_smart_chunker().reset() return {"ok": True} # ---- v2.87 Query decomposition ----------------------------------------- class DecomposeEnableRequest(BaseModel): enabled: bool class DecomposeRequest(BaseModel): query: str @app.get("/v1/admin/decompose") def admin_decompose_get(): from ..middleware.query_decompose import get_query_decomposer return get_query_decomposer().stats() @app.post("/v1/admin/decompose/enable") def admin_decompose_enable(req: DecomposeEnableRequest, request: Request): from ..middleware.query_decompose import get_query_decomposer get_query_decomposer().set_enabled(req.enabled) get_obs().audit( "decompose.enable", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), enabled=req.enabled, ) return {"ok": True, "enabled": req.enabled} @app.post("/v1/admin/decompose/try") def admin_decompose_try(req: DecomposeRequest): from ..middleware.query_decompose import get_query_decomposer result = get_query_decomposer().decompose(req.query) return result.to_dict() @app.post("/v1/admin/decompose/reset") def admin_decompose_reset(request: Request): from ..middleware.query_decompose import get_query_decomposer get_query_decomposer().reset() return {"ok": True} # ---- v2.88 Cost estimator ---------------------------------------------- class CostEstimateRequest(BaseModel): retriever: str generator: str k: int system_tokens: int = 500 query_tokens: int = 50 max_output_tokens: int = 1000 rerank: bool = False class CostRetrieverRequest(BaseModel): name: str cost_per_doc_usd: float = 0.0 latency_base_ms: float = 20.0 latency_per_doc_ms: float = 2.0 class CostGeneratorRequest(BaseModel): name: str price_in_per_1k: float price_out_per_1k: float first_token_ms: float = 200.0 per_token_ms: float = 15.0 output_utilization: float = 0.7 avg_chunk_tokens: int = 400 @app.get("/v1/admin/cost_estimator") def admin_cost_estimator_get(): from ..middleware.cost_estimator import get_cost_estimator e = get_cost_estimator() return { "stats": e.stats(), "retrievers": e.list_retrievers(), "generators": e.list_generators(), } @app.post("/v1/admin/cost_estimator/estimate") def admin_cost_estimator_estimate(req: CostEstimateRequest): from ..middleware.cost_estimator import get_cost_estimator try: breakdown = get_cost_estimator().estimate( retriever=req.retriever, generator=req.generator, k=req.k, system_tokens=req.system_tokens, query_tokens=req.query_tokens, max_output_tokens=req.max_output_tokens, rerank=req.rerank, ) except (KeyError, ValueError) as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return breakdown.to_dict() @app.post("/v1/admin/cost_estimator/retriever") def admin_cost_estimator_register_retriever(req: CostRetrieverRequest, request: Request): from ..middleware.cost_estimator import get_cost_estimator try: get_cost_estimator().register_retriever( req.name, cost_per_doc_usd=req.cost_per_doc_usd, latency_base_ms=req.latency_base_ms, latency_per_doc_ms=req.latency_per_doc_ms, replace=True, ) except (ValueError, KeyError) as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "name": req.name} @app.post("/v1/admin/cost_estimator/generator") def admin_cost_estimator_register_generator(req: CostGeneratorRequest, request: Request): from ..middleware.cost_estimator import get_cost_estimator try: get_cost_estimator().register_generator( req.name, price_in_per_1k=req.price_in_per_1k, price_out_per_1k=req.price_out_per_1k, first_token_ms=req.first_token_ms, per_token_ms=req.per_token_ms, output_utilization=req.output_utilization, avg_chunk_tokens=req.avg_chunk_tokens, replace=True, ) except (ValueError, KeyError) as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "name": req.name} @app.post("/v1/admin/cost_estimator/reset") def admin_cost_estimator_reset(request: Request): from ..middleware.cost_estimator import get_cost_estimator get_cost_estimator().reset() return {"ok": True} # ---- v2.89 Adaptive k selector ----------------------------------------- class AdaptiveKRecommendRequest(BaseModel): intent: Optional[str] = None intent_confidence: Optional[float] = None sub_queries: int = 1 complexity_score: Optional[float] = None available_tokens: Optional[int] = None base_k_override: Optional[int] = None class AdaptiveKBoundsRequest(BaseModel): min_k: int max_k: int class AdaptiveKIntentRequest(BaseModel): intent: str k: int @app.get("/v1/admin/adaptive_k") def admin_adaptive_k_get(): from ..middleware.adaptive_k import get_adaptive_k_selector return get_adaptive_k_selector().stats() @app.post("/v1/admin/adaptive_k/recommend") def admin_adaptive_k_recommend(req: AdaptiveKRecommendRequest): from ..middleware.adaptive_k import get_adaptive_k_selector rec = get_adaptive_k_selector().recommend( intent=req.intent, intent_confidence=req.intent_confidence, sub_queries=req.sub_queries, complexity_score=req.complexity_score, available_tokens=req.available_tokens, base_k_override=req.base_k_override, ) return rec.to_dict() @app.post("/v1/admin/adaptive_k/bounds") def admin_adaptive_k_set_bounds(req: AdaptiveKBoundsRequest, request: Request): from ..middleware.adaptive_k import get_adaptive_k_selector try: get_adaptive_k_selector().set_bounds(req.min_k, req.max_k) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "min_k": req.min_k, "max_k": req.max_k} @app.post("/v1/admin/adaptive_k/intent_k") def admin_adaptive_k_set_intent_k(req: AdaptiveKIntentRequest, request: Request): from ..middleware.adaptive_k import get_adaptive_k_selector try: get_adaptive_k_selector().set_intent_k(req.intent, req.k) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "intent": req.intent, "k": req.k} @app.post("/v1/admin/adaptive_k/reset") def admin_adaptive_k_reset(request: Request): from ..middleware.adaptive_k import get_adaptive_k_selector get_adaptive_k_selector().reset() return {"ok": True} # ---- v2.90 Pipeline trace recorder ------------------------------------- class TraceCapacityRequest(BaseModel): capacity: int class TraceRetentionRequest(BaseModel): retention_sec: float @app.get("/v1/admin/traces") def admin_traces_list(limit: int = 50, active_only: bool = False): from ..middleware.pipeline_trace import get_pipeline_trace_recorder r = get_pipeline_trace_recorder() return { "stats": r.stats(), "traces": r.list_traces(limit=limit, active_only=active_only), } @app.get("/v1/admin/traces/{trace_id}") def admin_traces_get(trace_id: str): from ..middleware.pipeline_trace import get_pipeline_trace_recorder trace = get_pipeline_trace_recorder().get(trace_id) if trace is None: raise HTTPException(status_code=404, detail={"error": "not found", "trace_id": trace_id}) return trace @app.post("/v1/admin/traces/capacity") def admin_traces_set_capacity(req: TraceCapacityRequest, request: Request): from ..middleware.pipeline_trace import get_pipeline_trace_recorder try: get_pipeline_trace_recorder().set_capacity(req.capacity) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "capacity": req.capacity} @app.post("/v1/admin/traces/retention") def admin_traces_set_retention(req: TraceRetentionRequest, request: Request): from ..middleware.pipeline_trace import get_pipeline_trace_recorder try: get_pipeline_trace_recorder().set_retention(req.retention_sec) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "retention_sec": req.retention_sec} @app.post("/v1/admin/traces/purge") def admin_traces_purge(request: Request): from ..middleware.pipeline_trace import get_pipeline_trace_recorder n = get_pipeline_trace_recorder().purge_expired() return {"ok": True, "purged": n} @app.post("/v1/admin/traces/reset") def admin_traces_reset(request: Request): from ..middleware.pipeline_trace import get_pipeline_trace_recorder get_pipeline_trace_recorder().reset() return {"ok": True} # ---- v2.91 A/B experiments --------------------------------------------- class ABRegisterRequest(BaseModel): name: str variants: List[str] traffic_split: List[float] class ABAssignRequest(BaseModel): name: str user_key: str class ABRecordRequest(BaseModel): name: str variant: str metric: str value: float @app.get("/v1/admin/ab") def admin_ab_list(): from ..middleware.ab_experiments import get_ab_manager m = get_ab_manager() return { "stats": m.stats(), "experiments": m.list_experiments(), } @app.post("/v1/admin/ab") def admin_ab_register(req: ABRegisterRequest, request: Request): from ..middleware.ab_experiments import get_ab_manager try: get_ab_manager().register(req.name, req.variants, req.traffic_split, replace=True) except (ValueError, KeyError) as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "ab.register", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), name=req.name, variants=req.variants, ) return {"ok": True, "name": req.name} @app.post("/v1/admin/ab/assign") def admin_ab_assign(req: ABAssignRequest): from ..middleware.ab_experiments import get_ab_manager variant = get_ab_manager().assign(req.name, req.user_key) return {"variant": variant, "experiment": req.name, "user_key": req.user_key} @app.post("/v1/admin/ab/record") def admin_ab_record(req: ABRecordRequest): from ..middleware.ab_experiments import get_ab_manager ok = get_ab_manager().record(req.name, req.variant, req.metric, req.value) if not ok: raise HTTPException(status_code=404, detail={"error": "experiment or variant " "not found"}) return {"ok": True} @app.get("/v1/admin/ab/{name}") def admin_ab_get(name: str, metric: Optional[str] = None): from ..middleware.ab_experiments import get_ab_manager return get_ab_manager().report(name, metric=metric) @app.post("/v1/admin/ab/{name}/pause") def admin_ab_pause(name: str, request: Request): from ..middleware.ab_experiments import get_ab_manager if not get_ab_manager().pause(name): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "name": name, "state": "paused"} @app.post("/v1/admin/ab/{name}/resume") def admin_ab_resume(name: str, request: Request): from ..middleware.ab_experiments import get_ab_manager if not get_ab_manager().resume(name): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "name": name, "state": "active"} @app.post("/v1/admin/ab/{name}/end") def admin_ab_end(name: str, request: Request): from ..middleware.ab_experiments import get_ab_manager if not get_ab_manager().end(name): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "name": name, "state": "ended"} # ---- v2.92 Tenant quotas ----------------------------------------------- class TenantRegisterRequest(BaseModel): tenant_id: str daily_request_limit: Optional[int] = None monthly_cost_usd_limit: Optional[float] = None per_request_max_usd: Optional[float] = None warn_threshold: float = 0.80 class TenantUpdateRequest(BaseModel): tenant_id: str daily_request_limit: Optional[int] = None monthly_cost_usd_limit: Optional[float] = None per_request_max_usd: Optional[float] = None warn_threshold: Optional[float] = None class TenantCheckRequest(BaseModel): tenant_id: str estimated_cost_usd: float = 0.0 class TenantRecordRequest(BaseModel): tenant_id: str cost_usd: float = 0.0 @app.get("/v1/admin/tenants") def admin_tenants_list(): from ..middleware.tenant_quotas import get_tenant_quota_store s = get_tenant_quota_store() return { "stats": s.stats(), "tenants": s.list_tenants(), } @app.post("/v1/admin/tenants") def admin_tenants_register(req: TenantRegisterRequest, request: Request): from ..middleware.tenant_quotas import get_tenant_quota_store try: get_tenant_quota_store().register( req.tenant_id, daily_request_limit=req.daily_request_limit, monthly_cost_usd_limit=req.monthly_cost_usd_limit, per_request_max_usd=req.per_request_max_usd, warn_threshold=req.warn_threshold, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "tenant_id": req.tenant_id} @app.post("/v1/admin/tenants/update") def admin_tenants_update(req: TenantUpdateRequest, request: Request): from ..middleware.tenant_quotas import get_tenant_quota_store try: ok = get_tenant_quota_store().update_limits( req.tenant_id, daily_request_limit=req.daily_request_limit, monthly_cost_usd_limit=req.monthly_cost_usd_limit, per_request_max_usd=req.per_request_max_usd, warn_threshold=req.warn_threshold, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) if not ok: raise HTTPException(status_code=404, detail={"error": "tenant not found"}) return {"ok": True} @app.get("/v1/admin/tenants/{tenant_id}") def admin_tenants_get(tenant_id: str): from ..middleware.tenant_quotas import get_tenant_quota_store t = get_tenant_quota_store().tenant(tenant_id) if t is None: raise HTTPException(status_code=404, detail={"error": "not found"}) return t @app.post("/v1/admin/tenants/check") def admin_tenants_check(req: TenantCheckRequest): from ..middleware.tenant_quotas import get_tenant_quota_store dec = get_tenant_quota_store().check( req.tenant_id, estimated_cost_usd=req.estimated_cost_usd) return dec.to_dict() @app.post("/v1/admin/tenants/record") def admin_tenants_record(req: TenantRecordRequest): from ..middleware.tenant_quotas import get_tenant_quota_store ok = get_tenant_quota_store().record( req.tenant_id, cost_usd=req.cost_usd) if not ok: raise HTTPException(status_code=404, detail={"error": "tenant not found"}) return {"ok": True} @app.delete("/v1/admin/tenants/{tenant_id}") def admin_tenants_delete(tenant_id: str, request: Request): from ..middleware.tenant_quotas import get_tenant_quota_store if not get_tenant_quota_store().unregister(tenant_id): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "tenant_id": tenant_id, "removed": True} # ---- v2.93 Multi-hop retrieval ----------------------------------------- class MultiHopConfigRequest(BaseModel): max_hops: Optional[int] = None k: Optional[int] = None max_entities_per_hop: Optional[int] = None @app.get("/v1/admin/multihop") def admin_multihop_get(): from ..middleware.multihop_retrieve import get_multihop_retriever r = get_multihop_retriever() return {"config": r.config(), "stats": r.stats()} @app.post("/v1/admin/multihop/config") def admin_multihop_config(req: MultiHopConfigRequest, request: Request): from ..middleware.multihop_retrieve import get_multihop_retriever try: get_multihop_retriever().set_defaults( max_hops=req.max_hops, k=req.k, max_entities_per_hop=req.max_entities_per_hop, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/multihop/reset") def admin_multihop_reset(request: Request): from ..middleware.multihop_retrieve import get_multihop_retriever get_multihop_retriever().reset() return {"ok": True} # ---- v2.94 Snippet extraction ------------------------------------------ class SnippetExtractRequest(BaseModel): query: str doc_text: str doc_id: Optional[str] = None class SnippetBatchRequest(BaseModel): query: str docs: List[Dict[str, Any]] class SnippetConfigRequest(BaseModel): top_sentences: Optional[int] = None max_snippet_chars: Optional[int] = None @app.get("/v1/admin/snippets") def admin_snippets_get(): from ..middleware.snippet_extract import get_snippet_extractor return get_snippet_extractor().stats() @app.post("/v1/admin/snippets/extract") def admin_snippets_extract(req: SnippetExtractRequest): from ..middleware.snippet_extract import get_snippet_extractor snip = get_snippet_extractor().extract( req.query, req.doc_text, doc_id=req.doc_id) return snip.to_dict() @app.post("/v1/admin/snippets/batch") def admin_snippets_batch(req: SnippetBatchRequest): from ..middleware.snippet_extract import get_snippet_extractor return {"docs": get_snippet_extractor().extract_batch( req.query, req.docs)} @app.post("/v1/admin/snippets/config") def admin_snippets_config(req: SnippetConfigRequest, request: Request): from ..middleware.snippet_extract import get_snippet_extractor s = get_snippet_extractor() try: if req.top_sentences is not None: s.set_top_sentences(req.top_sentences) if req.max_snippet_chars is not None: s.set_max_snippet_chars(req.max_snippet_chars) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "config": s.config()} # ---- v2.95 Answer grounding -------------------------------------------- class GroundingVerifyRequest(BaseModel): answer: str documents: List[Dict[str, Any]] class GroundingThresholdsRequest(BaseModel): pass_threshold: float warn_threshold: float class GroundingSupportRequest(BaseModel): support_threshold: float @app.get("/v1/admin/grounding") def admin_grounding_get(): from ..middleware.answer_grounding import get_grounding_verifier return get_grounding_verifier().stats() @app.post("/v1/admin/grounding/verify") def admin_grounding_verify(req: GroundingVerifyRequest): from ..middleware.answer_grounding import get_grounding_verifier r = get_grounding_verifier().verify(req.answer, req.documents) return r.to_dict() @app.post("/v1/admin/grounding/thresholds") def admin_grounding_thresholds(req: GroundingThresholdsRequest, request: Request): from ..middleware.answer_grounding import get_grounding_verifier try: get_grounding_verifier().set_thresholds( req.pass_threshold, req.warn_threshold) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/grounding/support") def admin_grounding_support(req: GroundingSupportRequest, request: Request): from ..middleware.answer_grounding import get_grounding_verifier try: get_grounding_verifier().set_support_threshold( req.support_threshold) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} # ---- v2.96 Corpus drift ------------------------------------------------ class DriftSnapshotRequest(BaseModel): label: str docs: List[Dict[str, Any]] class DriftThresholdsRequest(BaseModel): warn: float alert: float class DriftReferenceRequest(BaseModel): label: str @app.get("/v1/admin/drift") def admin_drift_get(): from ..middleware.corpus_drift import get_corpus_drift_monitor m = get_corpus_drift_monitor() return { "stats": m.stats(), "snapshots": m.list_snapshots(), } @app.post("/v1/admin/drift/snapshot") def admin_drift_snapshot(req: DriftSnapshotRequest, request: Request): from ..middleware.corpus_drift import get_corpus_drift_monitor try: info = get_corpus_drift_monitor().snapshot(req.label, req.docs) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return info @app.post("/v1/admin/drift/diff") def admin_drift_diff(reference: Optional[str] = None, current: Optional[str] = None): from ..middleware.corpus_drift import get_corpus_drift_monitor try: r = get_corpus_drift_monitor().diff( reference=reference, current=current) except (ValueError, KeyError) as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return r.to_dict() @app.post("/v1/admin/drift/reference") def admin_drift_set_reference(req: DriftReferenceRequest, request: Request): from ..middleware.corpus_drift import get_corpus_drift_monitor if not get_corpus_drift_monitor().set_reference(req.label): raise HTTPException(status_code=404, detail={"error": "snapshot not found"}) return {"ok": True, "reference": req.label} @app.post("/v1/admin/drift/thresholds") def admin_drift_thresholds(req: DriftThresholdsRequest, request: Request): from ..middleware.corpus_drift import get_corpus_drift_monitor try: get_corpus_drift_monitor().set_thresholds(req.warn, req.alert) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/drift/reset") def admin_drift_reset(request: Request): from ..middleware.corpus_drift import get_corpus_drift_monitor get_corpus_drift_monitor().reset() return {"ok": True} # ---- v2.97 Prometheus /metrics ---------------------------------------- from fastapi.responses import PlainTextResponse @app.get("/metrics/prometheus", response_class=PlainTextResponse) def prometheus_metrics(): """Prometheus text exposition format — scraped by Prometheus/Grafana.""" from ..middleware.prom_exporter import get_prometheus_exporter return get_prometheus_exporter().render() @app.get("/v1/admin/prometheus") def admin_prometheus_get(): from ..middleware.prom_exporter import get_prometheus_exporter e = get_prometheus_exporter() return {"stats": e.stats(), "metrics": e.list_metrics()} @app.post("/v1/admin/prometheus/reset") def admin_prometheus_reset(request: Request): from ..middleware.prom_exporter import get_prometheus_exporter get_prometheus_exporter().reset() return {"ok": True} # ---- v2.98 Request replay --------------------------------------------- class ReplayRecordRequest(BaseModel): request_id: str endpoint: str query: str k: int = 5 intent: Optional[str] = None retriever: Optional[str] = None latency_ms: Optional[float] = None cost_usd: Optional[float] = None success: bool = True error: Optional[str] = None tags: Optional[Dict[str, Any]] = None class ReplayCapacityRequest(BaseModel): capacity: int @app.get("/v1/admin/replay") def admin_replay_get(): from ..middleware.request_replay import get_replay_recorder return get_replay_recorder().stats() @app.post("/v1/admin/replay_store/record") def admin_replay_record(req: ReplayRecordRequest): from ..middleware.request_replay import get_replay_recorder get_replay_recorder().record( request_id=req.request_id, endpoint=req.endpoint, query=req.query, k=req.k, intent=req.intent, retriever=req.retriever, latency_ms=req.latency_ms, cost_usd=req.cost_usd, success=req.success, error=req.error, tags=req.tags, ) return {"ok": True} @app.get("/v1/admin/replay_store/records") def admin_replay_records(limit: int = 100, success_only: bool = False, endpoint: Optional[str] = None): from ..middleware.request_replay import get_replay_recorder return { "records": get_replay_recorder().list_records( limit=limit, success_only=success_only, endpoint=endpoint), } @app.get("/v1/admin/replay_store/record/{request_id}") def admin_replay_get_one(request_id: str): from ..middleware.request_replay import get_replay_recorder rec = get_replay_recorder().get(request_id) if rec is None: raise HTTPException(status_code=404, detail={"error": "not found"}) return rec @app.post("/v1/admin/replay_store/capacity") def admin_replay_capacity(req: ReplayCapacityRequest, request: Request): from ..middleware.request_replay import get_replay_recorder try: get_replay_recorder().set_capacity(req.capacity) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/replay_store/reset") def admin_replay_reset(request: Request): from ..middleware.request_replay import get_replay_recorder get_replay_recorder().reset() return {"ok": True} # ---- v2.99 Composite health ------------------------------------------- class HealthCacheRequest(BaseModel): ttl_sec: float @app.get("/v1/health/composite") def health_composite(use_cache: bool = True): """Public health endpoint — aggregates across subsystems.""" from ..middleware.composite_health import get_composite_health r = get_composite_health().evaluate(use_cache=use_cache) return r.to_dict() @app.get("/v1/admin/health/composite") def admin_health_composite_get(): from ..middleware.composite_health import get_composite_health return get_composite_health().stats() @app.post("/v1/admin/health/composite/cache_ttl") def admin_health_cache_ttl(req: HealthCacheRequest, request: Request): from ..middleware.composite_health import get_composite_health try: get_composite_health().set_cache_ttl(req.ttl_sec) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/health/composite/reset") def admin_health_composite_reset(request: Request): from ..middleware.composite_health import get_composite_health get_composite_health().reset() return {"ok": True} # ---- v3.0 Platform manifest -------------------------------------------- @app.get("/v1/platform/manifest") def platform_manifest_get( category: Optional[str] = None, since_prefix: Optional[str] = None, ported_only: bool = False, ): """Catalog of all features in the platform.""" from ..middleware.platform_manifest import get_platform_manifest m = get_platform_manifest() if category or since_prefix or ported_only: return { "platform_version": m.version(), "features": m.list( category=category, since_prefix=since_prefix, ported_only=ported_only), } return m.full_manifest() @app.get("/v1/platform/manifest/summary") def platform_manifest_summary(): from ..middleware.platform_manifest import get_platform_manifest m = get_platform_manifest() s = m.summary() s["categories"] = m.categories() return s @app.get("/v1/platform/manifest/{flag}") def platform_manifest_flag(flag: str): from ..middleware.platform_manifest import get_platform_manifest f = get_platform_manifest().get(flag) if f is None: raise HTTPException(status_code=404, detail={"error": "flag not found"}) return f # ---- v3.1 Self-diagnose ----------------------------------------------- @app.get("/v1/platform/diagnose") def platform_diagnose(): """Run a full system self-diagnosis and return a report.""" from ..middleware.self_diagnose import get_self_diagnose return get_self_diagnose().run().to_dict() @app.get("/v1/platform/diagnose/stats") def platform_diagnose_stats(): from ..middleware.self_diagnose import get_self_diagnose return get_self_diagnose().stats() # ---- v3.2 Changelog --------------------------------------------------- # ---- v3.3 HTML dashboard ---------------------------------------------- from fastapi.responses import HTMLResponse as _HTMLResponse @app.get("/v1/platform/dashboard", response_class=_HTMLResponse) def platform_dashboard(): """Self-contained HTML dashboard of the whole platform state.""" from ..middleware.html_dashboard import get_html_dashboard return get_html_dashboard().render() # ---- v3.4 Compliance bundle ------------------------------------------- class ComplianceSignerRequest(BaseModel): key: str signer_name: Optional[str] = None @app.post("/v1/platform/compliance/generate") def platform_compliance_generate(): """Generate a signed audit bundle (JSON).""" from ..middleware.compliance_bundle import get_compliance_bundle_generator bundle = get_compliance_bundle_generator().generate() return bundle.to_dict() @app.post("/v1/platform/compliance/verify") def platform_compliance_verify(bundle: Dict[str, Any]): """Verify a bundle's signature against the current signing key.""" from ..middleware.compliance_bundle import get_compliance_bundle_generator return get_compliance_bundle_generator().verify(bundle) @app.post("/v1/platform/compliance/signing_key") def platform_compliance_set_signing_key(req: ComplianceSignerRequest, request: Request): from ..middleware.compliance_bundle import get_compliance_bundle_generator try: get_compliance_bundle_generator().set_signing_key( req.key, signer_name=req.signer_name) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "compliance.signing_key_set", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), signer=req.signer_name, ) return {"ok": True, "signer": req.signer_name} @app.get("/v1/platform/compliance/stats") def platform_compliance_stats(): from ..middleware.compliance_bundle import get_compliance_bundle_generator return get_compliance_bundle_generator().stats() # ---- v3.5 Middleware SDK ---------------------------------------------- class ScaffoldRequest(BaseModel): name: str category: str = "generic" description: str = "" @app.get("/v1/platform/middleware") def platform_middleware_list(): """List all middleware registered via the SDK.""" from ..middleware.middleware_sdk import get_middleware_registry return {"middleware": get_middleware_registry().list()} @app.post("/v1/platform/middleware/scaffold") def platform_middleware_scaffold(req: ScaffoldRequest): """Generate Python source for a new middleware module.""" from ..middleware.middleware_sdk import scaffold_middleware_source try: source = scaffold_middleware_source( req.name, category=req.category, description=req.description) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"name": req.name, "source": source, "length_lines": source.count("\n")} @app.post("/v1/platform/middleware/reset_all") def platform_middleware_reset_all(request: Request): from ..middleware.middleware_sdk import get_middleware_registry n = get_middleware_registry().reset_all() return {"ok": True, "reset_count": n} # ---- v3.6 Eval harness ------------------------------------------------ class EvalCaseRequest(BaseModel): case_id: str query: str expected_doc_ids: List[str] = [] expected_substrings: List[str] = [] min_confidence: float = 0.0 k: int = 5 tags: List[str] = [] class EvalWeightsRequest(BaseModel): weights: Dict[str, float] class EvalRunRequest(BaseModel): case_ids: Optional[List[str]] = None @app.get("/v1/admin/eval") def admin_eval_get(): from ..middleware.eval_harness import get_eval_harness h = get_eval_harness() return { "stats": h.stats(), "cases": h.list_cases(), } @app.post("/v1/admin/eval/case") def admin_eval_add_case(req: EvalCaseRequest, request: Request): from ..middleware.eval_harness import get_eval_harness, TestCase try: get_eval_harness().add_case(TestCase( case_id=req.case_id, query=req.query, expected_doc_ids=req.expected_doc_ids, expected_substrings=req.expected_substrings, min_confidence=req.min_confidence, k=req.k, tags=req.tags, ), replace=True) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "case_id": req.case_id} @app.delete("/v1/admin/eval/case/{case_id}") def admin_eval_remove_case(case_id: str, request: Request): from ..middleware.eval_harness import get_eval_harness if not get_eval_harness().remove_case(case_id): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "case_id": case_id, "removed": True} @app.post("/v1/admin/eval/weights") def admin_eval_set_weights(req: EvalWeightsRequest, request: Request): from ..middleware.eval_harness import get_eval_harness try: get_eval_harness().set_weights(req.weights) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/eval/run") def admin_eval_run(req: EvalRunRequest, request: Request): from ..middleware.eval_harness import get_eval_harness try: report = get_eval_harness().run(case_ids=req.case_ids) except RuntimeError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() # ---- v3.7 Session persistence ----------------------------------------- class SessionSnapshotRequest(BaseModel): sessions: List[Dict[str, Any]] class SessionPersistPathRequest(BaseModel): path: str sessions: Optional[List[Dict[str, Any]]] = None class SessionPersistFormatRequest(BaseModel): format: str @app.get("/v1/admin/session_persist") def admin_session_persist_get(): from ..middleware.session_persistence import get_session_persistence return get_session_persistence().stats() @app.post("/v1/admin/session_persist/snapshot") def admin_session_persist_snapshot(req: SessionSnapshotRequest): from ..middleware.session_persistence import get_session_persistence data = get_session_persistence().snapshot(req.sessions) return { "bytes": len(data), "n_sessions": len(req.sessions), } @app.post("/v1/admin/session_persist/save") def admin_session_persist_save(req: SessionPersistPathRequest, request: Request): from ..middleware.session_persistence import get_session_persistence p = get_session_persistence() sessions = req.sessions if sessions is None: # Pull from the real session store try: from ..memory import get_store sessions = [s.to_dict() for s in get_store()._sessions.values()] except Exception: sessions = [] try: meta = p.save_to(req.path, sessions) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) get_obs().audit( "session_persist.save", actor_key=request.headers.get("x-api-key"), request_id=getattr(request.state, "request_id", None), path=req.path, n_sessions=len(sessions), ) return meta @app.post("/v1/admin/session_persist/load") def admin_session_persist_load(path: str, request: Request): from ..middleware.session_persistence import get_session_persistence p = get_session_persistence() try: parsed = p.load_from(path) except FileNotFoundError as e: raise HTTPException(status_code=404, detail={"error": str(e)}) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return parsed @app.post("/v1/admin/session_persist/format") def admin_session_persist_format(req: SessionPersistFormatRequest, request: Request): from ..middleware.session_persistence import get_session_persistence try: get_session_persistence().set_format(req.format) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "format": req.format} # ---- v3.8 Event bus --------------------------------------------------- class EventPublishRequest(BaseModel): event_type: str payload: Optional[Dict[str, Any]] = None source: Optional[str] = None @app.get("/v1/admin/events") def admin_events_get(): from ..middleware.event_bus import get_event_bus bus = get_event_bus() return { "stats": bus.stats(), "subscriptions": bus.list_subscriptions(), } @app.post("/v1/admin/events/publish") def admin_events_publish(req: EventPublishRequest, request: Request): from ..middleware.event_bus import get_event_bus try: event = get_event_bus().publish( req.event_type, payload=req.payload, source=req.source or "admin_api", ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return event.to_dict() @app.get("/v1/admin/events/history") def admin_events_history(limit: int = 50, event_type: Optional[str] = None): from ..middleware.event_bus import get_event_bus return { "events": get_event_bus().history( limit=limit, event_type=event_type), } @app.delete("/v1/admin/events/subscription/{subscription_id}") def admin_events_unsubscribe(subscription_id: str, request: Request): from ..middleware.event_bus import get_event_bus if not get_event_bus().unsubscribe(subscription_id): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "subscription_id": subscription_id} @app.post("/v1/admin/events/reset") def admin_events_reset(request: Request): from ..middleware.event_bus import get_event_bus get_event_bus().reset() return {"ok": True} # ---- v3.9 Graph retriever -------------------------------------------- class GraphEdgeRequest(BaseModel): from_id: str to_id: str relation: str = "cites" weight: Optional[float] = None metadata: Optional[Dict[str, Any]] = None class GraphExpandRequest(BaseModel): seed: List[Dict[str, Any]] max_depth: Optional[int] = None decay: Optional[float] = None top_k: Optional[int] = None allowed_relations: Optional[List[str]] = None @app.get("/v1/admin/graph") def admin_graph_stats(): from ..middleware.graph_retriever import get_graph_retriever return get_graph_retriever().stats() @app.post("/v1/admin/graph/edge") def admin_graph_add_edge(req: GraphEdgeRequest, request: Request): from ..middleware.graph_retriever import get_graph_retriever try: get_graph_retriever().add_edge( from_id=req.from_id, to_id=req.to_id, relation=req.relation, weight=req.weight, metadata=req.metadata, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.get("/v1/admin/graph/neighbors/{doc_id}") def admin_graph_neighbors(doc_id: str, relation: Optional[str] = None): from ..middleware.graph_retriever import get_graph_retriever return { "doc_id": doc_id, "neighbors": get_graph_retriever().neighbors(doc_id, relation=relation), } @app.post("/v1/admin/graph/expand") def admin_graph_expand(req: GraphExpandRequest, request: Request): from ..middleware.graph_retriever import get_graph_retriever try: result = get_graph_retriever().expand( seed=req.seed, max_depth=req.max_depth, decay=req.decay, top_k=req.top_k, allowed_relations=req.allowed_relations, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/graph/reset") def admin_graph_reset(request: Request): from ..middleware.graph_retriever import get_graph_retriever get_graph_retriever().reset() return {"ok": True} # ---- v3.10 Federated retrieval --------------------------------------- class FederatedQueryRequest(BaseModel): query: str top_k: Optional[int] = None fusion: Optional[str] = None only: Optional[List[str]] = None @app.get("/v1/admin/federated") def admin_federated_stats(): from ..middleware.federated_retrieval import get_federated_retriever r = get_federated_retriever() return { "stats": r.stats(), "sources": r.list_sources(), } @app.post("/v1/admin/federated/query") def admin_federated_query(req: FederatedQueryRequest, request: Request): from ..middleware.federated_retrieval import get_federated_retriever try: result = get_federated_retriever().query( query=req.query, top_k=req.top_k, fusion=req.fusion, only=req.only, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/federated/source/{name}/enable") def admin_federated_enable(name: str, enabled: bool, request: Request): from ..middleware.federated_retrieval import get_federated_retriever if not get_federated_retriever().set_enabled(name, enabled): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "name": name, "enabled": enabled} @app.post("/v1/admin/federated/reset") def admin_federated_reset(request: Request): from ..middleware.federated_retrieval import get_federated_retriever get_federated_retriever().reset() return {"ok": True} # ---- v3.11 Audit export ---------------------------------------------- @app.get("/v1/admin/audit_export") def admin_audit_export_stats(): from ..middleware.audit_export import get_audit_exporter e = get_audit_exporter() return { "stats": e.stats(), "cursors": e.list_cursors(), } @app.get("/v1/admin/audit_export/cursor/{name}") def admin_audit_export_cursor(name: str): from ..middleware.audit_export import get_audit_exporter c = get_audit_exporter().get_cursor(name) if c is None: raise HTTPException(status_code=404, detail={"error": "not found"}) return c @app.delete("/v1/admin/audit_export/cursor/{name}") def admin_audit_export_reset_cursor(name: str, request: Request): from ..middleware.audit_export import get_audit_exporter if not get_audit_exporter().reset_cursor(name): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "name": name} @app.post("/v1/admin/audit_export/reset") def admin_audit_export_reset(request: Request): from ..middleware.audit_export import get_audit_exporter get_audit_exporter().reset() return {"ok": True} # ---- v3.12 Graph builder -------------------------------------------- class GraphBuildRequest(BaseModel): docs: List[Dict[str, Any]] apply_to_graph: bool = False @app.get("/v1/admin/graph_builder") def admin_graph_builder_stats(): from ..middleware.graph_builder import get_graph_builder return get_graph_builder().stats() @app.post("/v1/admin/graph_builder/build") def admin_graph_builder_build(req: GraphBuildRequest, request: Request): from ..middleware.graph_builder import get_graph_builder try: proposals, stats = get_graph_builder().build_from_docs( docs=req.docs, apply_to_graph=req.apply_to_graph, ) except RuntimeError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return { "stats": stats.to_dict(), "proposals": [p.to_dict() for p in proposals], } @app.post("/v1/admin/graph_builder/reset") def admin_graph_builder_reset(request: Request): from ..middleware.graph_builder import get_graph_builder get_graph_builder().reset() return {"ok": True} # ---- v3.13 Answer templates ----------------------------------------- class TemplateRenderRequest(BaseModel): template_id: str slots: Dict[str, str] class TemplateRenderAutoRequest(BaseModel): intent: str lang: str = "he" slots: Dict[str, str] @app.get("/v1/admin/answer_templates") def admin_answer_templates_list(): from ..middleware.answer_templates import get_answer_templates s = get_answer_templates() return { "stats": s.stats(), "templates": s.list(), } @app.get("/v1/admin/answer_templates/{template_id}") def admin_answer_template_get(template_id: str): from ..middleware.answer_templates import get_answer_templates tpl = get_answer_templates().get(template_id) if tpl is None: raise HTTPException(status_code=404, detail={"error": "not found"}) return { "template_id": tpl.template_id, "intent": tpl.intent, "lang": tpl.lang, "title_md": tpl.title_md, "sections": [ {"heading": s.get("heading", ""), "body": s.get("body", ""), "slots": [ {"name": sl.name, "flag": sl.flag, "description": sl.description, "max_chars": sl.max_chars} for sl in ( s.get("slots", [])) ]} for s in tpl.sections ], "footer_md": tpl.footer_md, } @app.post("/v1/admin/answer_templates/render") def admin_answer_template_render(req: TemplateRenderRequest, request: Request): from ..middleware.answer_templates import get_answer_templates try: report = get_answer_templates().render( template_id=req.template_id, slots=req.slots) except ValueError as e: raise HTTPException(status_code=404, detail={"error": str(e)}) return { "report": report.to_dict(), "rendered": report.rendered, } @app.post("/v1/admin/answer_templates/render_auto") def admin_answer_template_render_auto(req: TemplateRenderAutoRequest, request: Request): from ..middleware.answer_templates import get_answer_templates report = get_answer_templates().render_auto( intent=req.intent, slots=req.slots, lang=req.lang) if report is None: raise HTTPException(status_code=404, detail={"error": f"no template for intent={req.intent} " f"lang={req.lang}"}) return { "report": report.to_dict(), "rendered": report.rendered, } @app.post("/v1/admin/answer_templates/reset") def admin_answer_templates_reset(request: Request): from ..middleware.answer_templates import ( AnswerTemplateStore, set_answer_templates, ) # Full reset with defaults re-registered set_answer_templates(AnswerTemplateStore()) return {"ok": True} # ---- v3.14 Clarification planner ------------------------------------ class ClarifyRequest(BaseModel): query: str retrieval_count: Optional[int] = None intent_top2: Optional[List[Dict[str, Any]]] = None @app.get("/v1/admin/clarify") def admin_clarify_stats(): from ..middleware.clarification import get_clarification_planner p = get_clarification_planner() return { "stats": p.stats(), "polysemous_terms": p.list_polysemous_terms(), } @app.post("/v1/admin/clarify/analyze") def admin_clarify_analyze(req: ClarifyRequest, request: Request): from ..middleware.clarification import get_clarification_planner try: report = get_clarification_planner().analyze( query=req.query, retrieval_count=req.retrieval_count, intent_top2=[{"intent": x.get("intent", ""), "confidence": float(x.get("confidence", 0))} for x in (req.intent_top2 or [])] or None, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.post("/v1/admin/clarify/reset") def admin_clarify_reset(request: Request): from ..middleware.clarification import get_clarification_planner get_clarification_planner().reset() return {"ok": True} # ---- v3.15 Citation parser ------------------------------------------ class CitationParseRequest(BaseModel): text: str @app.get("/v1/admin/citation_parser") def admin_citation_parser_stats(): from ..middleware.citation_parser import get_citation_parser return get_citation_parser().stats() @app.post("/v1/admin/citation_parser/parse") def admin_citation_parser_parse(req: CitationParseRequest, request: Request): from ..middleware.citation_parser import get_citation_parser result = get_citation_parser().parse(req.text) return result.to_dict() @app.post("/v1/admin/citation_parser/reset") def admin_citation_parser_reset(request: Request): from ..middleware.citation_parser import get_citation_parser get_citation_parser().reset() return {"ok": True} # ---- v3.16 Spell correction ----------------------------------------- class SpellCorrectRequest(BaseModel): query: str class VocabAddRequest(BaseModel): words: List[str] @app.get("/v1/admin/spell_correct") def admin_spell_correct_stats(): from ..middleware.spell_correct import get_spell_corrector return get_spell_corrector().stats() @app.post("/v1/admin/spell_correct/correct") def admin_spell_correct_correct(req: SpellCorrectRequest, request: Request): from ..middleware.spell_correct import get_spell_corrector report = get_spell_corrector().correct(req.query) return report.to_dict() @app.post("/v1/admin/spell_correct/vocab") def admin_spell_correct_add_vocab(req: VocabAddRequest, request: Request): from ..middleware.spell_correct import get_spell_corrector added = get_spell_corrector().add_words(req.words) return {"ok": True, "added": added} @app.post("/v1/admin/spell_correct/reset") def admin_spell_correct_reset(request: Request): from ..middleware.spell_correct import get_spell_corrector get_spell_corrector().reset() return {"ok": True} # ---- v3.17 Retrieval budget ----------------------------------------- class BudgetProfileRequest(BaseModel): name: str unit_cost: float base_cost: float = 0.0 quality: float = 1.0 replace: bool = False class BudgetPlanRequest(BaseModel): retrievers: List[str] top_k: Optional[int] = None budget: Optional[float] = None mode: Optional[str] = None @app.get("/v1/admin/retrieval_budget") def admin_retrieval_budget_stats(): from ..middleware.retrieval_budget import get_retrieval_budget_planner p = get_retrieval_budget_planner() return { "stats": p.stats(), "profiles": p.list_profiles(), "active": p.list_active(), } @app.post("/v1/admin/retrieval_budget/profile") def admin_retrieval_budget_add_profile(req: BudgetProfileRequest, request: Request): from ..middleware.retrieval_budget import ( get_retrieval_budget_planner, RetrieverCostProfile, ) try: get_retrieval_budget_planner().register_profile( RetrieverCostProfile( name=req.name, unit_cost=req.unit_cost, base_cost=req.base_cost, quality=req.quality, ), replace=req.replace, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "name": req.name} @app.post("/v1/admin/retrieval_budget/plan") def admin_retrieval_budget_plan(req: BudgetPlanRequest, request: Request): from ..middleware.retrieval_budget import get_retrieval_budget_planner try: plan = get_retrieval_budget_planner().plan( retrievers=req.retrievers, top_k=req.top_k, budget=req.budget, mode=req.mode, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return plan.to_dict() @app.post("/v1/admin/retrieval_budget/reset") def admin_retrieval_budget_reset(request: Request): from ..middleware.retrieval_budget import get_retrieval_budget_planner get_retrieval_budget_planner().reset() return {"ok": True} # ---- v3.18 Document summarizer -------------------------------------- class SummarizeRequest(BaseModel): doc_id: str text: str length: str = "medium" n_sentences: Optional[int] = None class IndexDocsRequest(BaseModel): docs: List[Dict[str, Any]] @app.get("/v1/admin/doc_summarizer") def admin_doc_summarizer_stats(): from ..middleware.doc_summarizer import get_doc_summarizer return get_doc_summarizer().stats() @app.post("/v1/admin/doc_summarizer/summarize") def admin_doc_summarizer_summarize(req: SummarizeRequest, request: Request): from ..middleware.doc_summarizer import get_doc_summarizer try: result = get_doc_summarizer().summarize( doc_id=req.doc_id, text=req.text, length=req.length, n_sentences=req.n_sentences, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/doc_summarizer/index") def admin_doc_summarizer_index(req: IndexDocsRequest, request: Request): from ..middleware.doc_summarizer import get_doc_summarizer n = get_doc_summarizer().index_corpus(req.docs) return {"ok": True, "unique_tokens": n} @app.post("/v1/admin/doc_summarizer/reset") def admin_doc_summarizer_reset(request: Request): from ..middleware.doc_summarizer import get_doc_summarizer get_doc_summarizer().reset() return {"ok": True} # ---- v3.19 Privilege filter ----------------------------------------- class ClassifyRequest(BaseModel): doc_id: str text: str class FilterRequest(BaseModel): doc_id: str text: str user_clearances: List[str] masking_mode: str = "mask" class PrivilegeRuleRequest(BaseModel): rule_id: str level: str pattern: str is_regex: bool = True description: str = "" replace: bool = False @app.get("/v1/admin/privilege") def admin_privilege_stats(): from ..middleware.privilege_filter import get_privilege_filter p = get_privilege_filter() return {"stats": p.stats(), "rules": p.list_rules()} @app.post("/v1/admin/privilege/classify") def admin_privilege_classify(req: ClassifyRequest, request: Request): from ..middleware.privilege_filter import get_privilege_filter result = get_privilege_filter().classify(req.doc_id, req.text) return result.to_dict() @app.post("/v1/admin/privilege/filter") def admin_privilege_filter(req: FilterRequest, request: Request): from ..middleware.privilege_filter import get_privilege_filter try: decision = get_privilege_filter().filter( req.doc_id, req.text, user_clearances=req.user_clearances, masking_mode=req.masking_mode, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) out = decision.to_dict() if decision.masked_text is not None: out["masked_text"] = decision.masked_text return out @app.post("/v1/admin/privilege/rule") def admin_privilege_add_rule(req: PrivilegeRuleRequest, request: Request): from ..middleware.privilege_filter import ( get_privilege_filter, PrivilegeRule, ) try: get_privilege_filter().add_rule( PrivilegeRule( rule_id=req.rule_id, level=req.level, pattern=req.pattern, is_regex=req.is_regex, description=req.description, ), replace=req.replace, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "rule_id": req.rule_id} @app.delete("/v1/admin/privilege/rule/{rule_id}") def admin_privilege_remove_rule(rule_id: str, request: Request): from ..middleware.privilege_filter import get_privilege_filter if not get_privilege_filter().remove_rule(rule_id): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "rule_id": rule_id} @app.post("/v1/admin/privilege/reset") def admin_privilege_reset(request: Request): from ..middleware.privilege_filter import ( PrivilegeFilter, set_privilege_filter, ) set_privilege_filter(PrivilegeFilter()) return {"ok": True} # ---- v3.20 Time travel ---------------------------------------------- class AddVersionRequest(BaseModel): doc_id: str version: str effective_from: float effective_to: Optional[float] = None text: str = "" metadata: Optional[Dict[str, Any]] = None class TimeTravelAtRequest(BaseModel): ts: float class TimeTravelDiffRequest(BaseModel): doc_id: str from_ts: float to_ts: float @app.get("/v1/admin/time_travel") def admin_time_travel_stats(): from ..middleware.time_travel import get_time_travel_index return get_time_travel_index().stats() @app.post("/v1/admin/time_travel/version") def admin_time_travel_add_version(req: AddVersionRequest, request: Request): from ..middleware.time_travel import ( get_time_travel_index, VersionedDoc, ) try: get_time_travel_index().add_version(VersionedDoc( doc_id=req.doc_id, version=req.version, effective_from=req.effective_from, effective_to=req.effective_to, text=req.text, metadata=req.metadata or {}, )) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/time_travel/at") def admin_time_travel_at(req: TimeTravelAtRequest, request: Request): from ..middleware.time_travel import get_time_travel_index try: snap = get_time_travel_index().at(req.ts) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return { "ts": snap.ts, "n_docs": snap.n_docs(), "doc_ids": snap.list_doc_ids(), } @app.get("/v1/admin/time_travel/history/{doc_id}") def admin_time_travel_history(doc_id: str, include_text: bool = False): from ..middleware.time_travel import get_time_travel_index return { "doc_id": doc_id, "history": get_time_travel_index().history( doc_id, include_text=include_text), } @app.post("/v1/admin/time_travel/diff") def admin_time_travel_diff(req: TimeTravelDiffRequest, request: Request): from ..middleware.time_travel import get_time_travel_index try: diff = get_time_travel_index().diff( req.doc_id, req.from_ts, req.to_ts) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return diff.to_dict() @app.post("/v1/admin/time_travel/reset") def admin_time_travel_reset(request: Request): from ..middleware.time_travel import get_time_travel_index get_time_travel_index().reset() return {"ok": True} # ---- v3.21 Fact extractor ------------------------------------------- class FactExtractRequest(BaseModel): doc_id: str text: str class FactExtractEnabledRequest(BaseModel): enabled_types: List[str] @app.get("/v1/admin/fact_extractor") def admin_fact_extractor_stats(): from ..middleware.fact_extractor import get_fact_extractor return get_fact_extractor().stats() @app.post("/v1/admin/fact_extractor/extract") def admin_fact_extractor_extract(req: FactExtractRequest, request: Request): from ..middleware.fact_extractor import get_fact_extractor result = get_fact_extractor().extract(req.doc_id, req.text) return result.to_dict() @app.post("/v1/admin/fact_extractor/enabled") def admin_fact_extractor_set_enabled(req: FactExtractEnabledRequest, request: Request): from ..middleware.fact_extractor import get_fact_extractor try: get_fact_extractor().set_enabled(req.enabled_types) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "enabled": req.enabled_types} @app.post("/v1/admin/fact_extractor/reset") def admin_fact_extractor_reset(request: Request): from ..middleware.fact_extractor import get_fact_extractor get_fact_extractor().reset() return {"ok": True} # ---- v3.22 Answer consensus ----------------------------------------- class CandidateItem(BaseModel): answerer: str answer_text: str citations: Optional[List[str]] = None confidence: Optional[float] = None class ConsensusRequest(BaseModel): question: str candidates: List[CandidateItem] @app.get("/v1/admin/consensus") def admin_consensus_stats(): from ..middleware.answer_consensus import get_answer_consensus return get_answer_consensus().stats() @app.post("/v1/admin/consensus/analyze") def admin_consensus_analyze(req: ConsensusRequest, request: Request): from ..middleware.answer_consensus import ( get_answer_consensus, AnswerCandidate, ) cands = [ AnswerCandidate( answerer=c.answerer, answer_text=c.answer_text, citations=c.citations or [], confidence=c.confidence, ) for c in req.candidates ] try: result = get_answer_consensus().analyze(req.question, cands) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/consensus/reset") def admin_consensus_reset(request: Request): from ..middleware.answer_consensus import get_answer_consensus get_answer_consensus().reset() return {"ok": True} # ---- v3.23 Authority ranker ----------------------------------------- class AuthorityRankRequest(BaseModel): docs: List[Dict[str, Any]] query: Optional[str] = None class AuthorityWeightRequest(BaseModel): source_type: str weight: float @app.get("/v1/admin/authority") def admin_authority_stats(): from ..middleware.authority_ranker import get_authority_ranker r = get_authority_ranker() return { "stats": r.stats(), "weights": r.list_weights(), } @app.post("/v1/admin/authority/rank") def admin_authority_rank(req: AuthorityRankRequest, request: Request): from ..middleware.authority_ranker import get_authority_ranker try: result = get_authority_ranker().rank( docs=req.docs, query=req.query) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/authority/weight") def admin_authority_set_weight(req: AuthorityWeightRequest, request: Request): from ..middleware.authority_ranker import get_authority_ranker try: get_authority_ranker().set_weight(req.source_type, req.weight) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.delete("/v1/admin/authority/weight/{source_type}") def admin_authority_remove_weight(source_type: str, request: Request): from ..middleware.authority_ranker import get_authority_ranker if not get_authority_ranker().remove_weight(source_type): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True, "source_type": source_type} @app.post("/v1/admin/authority/reset") def admin_authority_reset(request: Request): from ..middleware.authority_ranker import get_authority_ranker get_authority_ranker().reset() return {"ok": True} # ---- v3.24 Document comparator -------------------------------------- class DocCompareRequest(BaseModel): doc_a_id: str text_a: str doc_b_id: str text_b: str include_text: bool = True @app.get("/v1/admin/doc_comparator") def admin_doc_comparator_stats(): from ..middleware.doc_comparator import get_doc_comparator return get_doc_comparator().stats() @app.post("/v1/admin/doc_comparator/compare") def admin_doc_comparator_compare(req: DocCompareRequest, request: Request): from ..middleware.doc_comparator import get_doc_comparator try: result = get_doc_comparator().compare( doc_a_id=req.doc_a_id, text_a=req.text_a, doc_b_id=req.doc_b_id, text_b=req.text_b, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict(include_text=req.include_text) @app.post("/v1/admin/doc_comparator/reset") def admin_doc_comparator_reset(request: Request): from ..middleware.doc_comparator import get_doc_comparator get_doc_comparator().reset() return {"ok": True} # ---- v3.25 Similar docs --------------------------------------------- class AddSimDocRequest(BaseModel): doc_id: str text: str metadata: Optional[Dict[str, Any]] = None class SimDocsIndexRequest(BaseModel): docs: List[Dict[str, Any]] class SimDocsFindRequest(BaseModel): doc_id: Optional[str] = None text: Optional[str] = None top_k: int = 10 exclude_self: bool = True min_score: float = 0.0 filter: Optional[Dict[str, Any]] = None @app.get("/v1/admin/similar_docs") def admin_similar_docs_stats(): from ..middleware.similar_docs import get_similar_doc_finder return get_similar_doc_finder().stats() @app.post("/v1/admin/similar_docs/add") def admin_similar_docs_add(req: AddSimDocRequest, request: Request): from ..middleware.similar_docs import get_similar_doc_finder try: get_similar_doc_finder().add_doc( doc_id=req.doc_id, text=req.text, metadata=req.metadata, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/similar_docs/index") def admin_similar_docs_index(req: SimDocsIndexRequest, request: Request): from ..middleware.similar_docs import get_similar_doc_finder added = get_similar_doc_finder().add_corpus(req.docs) return {"ok": True, "added": added} @app.delete("/v1/admin/similar_docs/{doc_id}") def admin_similar_docs_remove(doc_id: str, request: Request): from ..middleware.similar_docs import get_similar_doc_finder if not get_similar_doc_finder().remove_doc(doc_id): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True} @app.post("/v1/admin/similar_docs/find") def admin_similar_docs_find(req: SimDocsFindRequest, request: Request): from ..middleware.similar_docs import get_similar_doc_finder finder = get_similar_doc_finder() try: if req.doc_id: result = finder.find_similar_to_doc( doc_id=req.doc_id, top_k=req.top_k, exclude_self=req.exclude_self, min_score=req.min_score, filter=req.filter, ) elif req.text is not None: result = finder.find_similar_to_text( text=req.text, top_k=req.top_k, min_score=req.min_score, filter=req.filter, ) else: raise ValueError("either doc_id or text required") except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/similar_docs/reset") def admin_similar_docs_reset(request: Request): from ..middleware.similar_docs import get_similar_doc_finder get_similar_doc_finder().reset() return {"ok": True} # ---- v3.26 Query lifecycle ------------------------------------------ class LifecycleStartRequest(BaseModel): query_id: str query_text: str tenant_id: Optional[str] = None user_id: Optional[str] = None class LifecycleRecordRequest(BaseModel): query_id: str stage: str ok: bool = True duration_ms: Optional[float] = None details: Optional[Dict[str, Any]] = None class LifecycleFinishRequest(BaseModel): query_id: str outcome: str = "ok" final_answer: Optional[str] = None final_confidence: Optional[float] = None total_cost: Optional[float] = None @app.get("/v1/admin/lifecycle") def admin_lifecycle_stats(): from ..middleware.query_lifecycle import get_lifecycle_reporter r = get_lifecycle_reporter() return { "stats": r.stats(), "n_active": len(r.list_active()), } @app.post("/v1/admin/lifecycle/start") def admin_lifecycle_start(req: LifecycleStartRequest, request: Request): from ..middleware.query_lifecycle import get_lifecycle_reporter try: rec = get_lifecycle_reporter().start( query_id=req.query_id, query_text=req.query_text, tenant_id=req.tenant_id, user_id=req.user_id, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return rec.to_dict() @app.post("/v1/admin/lifecycle/record") def admin_lifecycle_record(req: LifecycleRecordRequest, request: Request): from ..middleware.query_lifecycle import get_lifecycle_reporter try: rec = get_lifecycle_reporter().record( query_id=req.query_id, stage=req.stage, ok=req.ok, duration_ms=req.duration_ms, details=req.details, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "n_events": len(rec.events)} @app.post("/v1/admin/lifecycle/finish") def admin_lifecycle_finish(req: LifecycleFinishRequest, request: Request): from ..middleware.query_lifecycle import get_lifecycle_reporter try: rec = get_lifecycle_reporter().finish( query_id=req.query_id, outcome=req.outcome, final_answer=req.final_answer, final_confidence=req.final_confidence, total_cost=req.total_cost, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) if rec is None: raise HTTPException(status_code=404, detail={"error": "query_id not found"}) return rec.to_dict() @app.get("/v1/admin/lifecycle/record/{query_id}") def admin_lifecycle_get_record(query_id: str): from ..middleware.query_lifecycle import get_lifecycle_reporter r = get_lifecycle_reporter().get_record(query_id) if r is None: raise HTTPException(status_code=404, detail={"error": "not found"}) return r @app.get("/v1/admin/lifecycle/summary/{query_id}") def admin_lifecycle_summary(query_id: str): from ..middleware.query_lifecycle import get_lifecycle_reporter try: return get_lifecycle_reporter().summarize(query_id) except ValueError as e: raise HTTPException(status_code=404, detail={"error": str(e)}) @app.get("/v1/admin/lifecycle/history") def admin_lifecycle_history( limit: int = 50, outcome: Optional[str] = None, tenant_id: Optional[str] = None, ): from ..middleware.query_lifecycle import get_lifecycle_reporter try: return { "records": get_lifecycle_reporter().history( limit=limit, outcome=outcome, tenant_id=tenant_id, ), } except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) @app.post("/v1/admin/lifecycle/reset") def admin_lifecycle_reset(request: Request): from ..middleware.query_lifecycle import get_lifecycle_reporter get_lifecycle_reporter().reset() return {"ok": True} # ---- v3.27 Timeline builder ----------------------------------------- class TimelineEventRequest(BaseModel): event_id: str iso_date: str description: str doc_id: Optional[str] = None event_type: Optional[str] = None metadata: Optional[Dict[str, Any]] = None confidence: float = 1.0 class TimelineIngestRequest(BaseModel): doc_id: str text: str class TimelineBuildRequest(BaseModel): from_ts: Optional[float] = None to_ts: Optional[float] = None event_type: Optional[str] = None doc_id: Optional[str] = None @app.get("/v1/admin/timeline") def admin_timeline_stats(): from ..middleware.timeline_builder import get_timeline_builder return get_timeline_builder().stats() @app.post("/v1/admin/timeline/event") def admin_timeline_add_event(req: TimelineEventRequest, request: Request): from ..middleware.timeline_builder import get_timeline_builder try: event = get_timeline_builder().add_event_simple( event_id=req.event_id, iso_date=req.iso_date, description=req.description, doc_id=req.doc_id, event_type=req.event_type, metadata=req.metadata, confidence=req.confidence, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return event.to_dict() @app.post("/v1/admin/timeline/ingest") def admin_timeline_ingest(req: TimelineIngestRequest, request: Request): from ..middleware.timeline_builder import get_timeline_builder added = get_timeline_builder().ingest_from_fact_extraction( req.doc_id, req.text) return {"ok": True, "added": added} @app.delete("/v1/admin/timeline/event/{event_id}") def admin_timeline_remove(event_id: str, request: Request): from ..middleware.timeline_builder import get_timeline_builder if not get_timeline_builder().remove_event(event_id): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True} @app.post("/v1/admin/timeline/build") def admin_timeline_build(req: TimelineBuildRequest, request: Request): from ..middleware.timeline_builder import get_timeline_builder report = get_timeline_builder().build( from_ts=req.from_ts, to_ts=req.to_ts, event_type=req.event_type, doc_id=req.doc_id, ) return report.to_dict() @app.post("/v1/admin/timeline/reset") def admin_timeline_reset(request: Request): from ..middleware.timeline_builder import get_timeline_builder get_timeline_builder().reset() return {"ok": True} # ---- v3.28 Corpus contradictions ------------------------------------ class CrawlRequest(BaseModel): docs: List[Dict[str, Any]] @app.get("/v1/admin/contradictions") def admin_contradictions_stats(): from ..middleware.corpus_contradictions import get_contradiction_crawler return get_contradiction_crawler().stats() @app.post("/v1/admin/contradictions/crawl") def admin_contradictions_crawl(req: CrawlRequest, request: Request): from ..middleware.corpus_contradictions import get_contradiction_crawler try: report = get_contradiction_crawler().crawl(req.docs) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.post("/v1/admin/contradictions/reset") def admin_contradictions_reset(request: Request): from ..middleware.corpus_contradictions import get_contradiction_crawler get_contradiction_crawler().reset() return {"ok": True} # ---- v3.29 Anonymizer ----------------------------------------------- class AnonymizeRequest(BaseModel): doc_id: str text: str mode: str = "hash" class AnonymizeBulkRequest(BaseModel): docs: List[Dict[str, Any]] mode: str = "hash" class AnonTypesRequest(BaseModel): enabled_types: List[str] @app.get("/v1/admin/anonymizer") def admin_anonymizer_stats(): from ..middleware.anonymizer import get_anonymizer return get_anonymizer().stats() @app.post("/v1/admin/anonymizer/anonymize") def admin_anonymizer_anonymize(req: AnonymizeRequest, request: Request): from ..middleware.anonymizer import get_anonymizer try: result = get_anonymizer().anonymize( doc_id=req.doc_id, text=req.text, mode=req.mode) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/anonymizer/anonymize_bulk") def admin_anonymizer_bulk(req: AnonymizeBulkRequest, request: Request): from ..middleware.anonymizer import get_anonymizer try: results = get_anonymizer().anonymize_docs( docs=req.docs, mode=req.mode) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"results": [r.to_dict(include_text=False) for r in results]} @app.post("/v1/admin/anonymizer/enabled") def admin_anonymizer_set_enabled(req: AnonTypesRequest, request: Request): from ..middleware.anonymizer import get_anonymizer try: get_anonymizer().set_enabled(req.enabled_types) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "enabled": req.enabled_types} @app.post("/v1/admin/anonymizer/reset") def admin_anonymizer_reset(request: Request): from ..middleware.anonymizer import get_anonymizer get_anonymizer().reset() return {"ok": True} # ---- v3.30 Document lineage ----------------------------------------- class LineageRegisterRequest(BaseModel): doc_id: str initial_hash: Optional[str] = None class LineageLinkRequest(BaseModel): doc_id: str xform_type: str actor: str source_hash: Optional[str] = None output_hash: Optional[str] = None description: str = "" metadata: Optional[Dict[str, Any]] = None class LineageSupersedeRequest(BaseModel): old_doc_id: str new_doc_id: str actor: str description: str = "" @app.get("/v1/admin/lineage") def admin_lineage_stats(): from ..middleware.doc_lineage import get_doc_lineage return get_doc_lineage().stats() @app.post("/v1/admin/lineage/register") def admin_lineage_register(req: LineageRegisterRequest, request: Request): from ..middleware.doc_lineage import get_doc_lineage try: get_doc_lineage().register_doc( doc_id=req.doc_id, initial_hash=req.initial_hash) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/lineage/link") def admin_lineage_add_link(req: LineageLinkRequest, request: Request): from ..middleware.doc_lineage import get_doc_lineage try: link = get_doc_lineage().add_link( doc_id=req.doc_id, xform_type=req.xform_type, actor=req.actor, source_hash=req.source_hash, output_hash=req.output_hash, description=req.description, metadata=req.metadata, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return link.to_dict() @app.post("/v1/admin/lineage/supersede") def admin_lineage_supersede(req: LineageSupersedeRequest, request: Request): from ..middleware.doc_lineage import get_doc_lineage try: link = get_doc_lineage().supersede( old_doc_id=req.old_doc_id, new_doc_id=req.new_doc_id, actor=req.actor, description=req.description, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return link.to_dict() @app.get("/v1/admin/lineage/doc/{doc_id}") def admin_lineage_get(doc_id: str, include_links: bool = True): from ..middleware.doc_lineage import get_doc_lineage rec = get_doc_lineage().get_record(doc_id, include_links=include_links) if rec is None: raise HTTPException(status_code=404, detail={"error": "not found"}) return rec @app.get("/v1/admin/lineage/verify/{doc_id}") def admin_lineage_verify(doc_id: str): from ..middleware.doc_lineage import get_doc_lineage report = get_doc_lineage().verify(doc_id) return report.to_dict() @app.get("/v1/admin/lineage/verify_all") def admin_lineage_verify_all(): from ..middleware.doc_lineage import get_doc_lineage return get_doc_lineage().verify_all() @app.post("/v1/admin/lineage/reset") def admin_lineage_reset(request: Request): from ..middleware.doc_lineage import get_doc_lineage get_doc_lineage().reset() return {"ok": True} # ---- v3.31 Result explainer ----------------------------------------- class ExplainDocRequest(BaseModel): query: str doc: Dict[str, Any] lang: Optional[str] = None class ExplainResultsRequest(BaseModel): query: str docs: List[Dict[str, Any]] lang: Optional[str] = None @app.get("/v1/admin/explain") def admin_explain_stats(): from ..middleware.result_explainer import get_result_explainer return get_result_explainer().stats() @app.post("/v1/admin/explain/doc") def admin_explain_doc(req: ExplainDocRequest, request: Request): from ..middleware.result_explainer import get_result_explainer try: expl = get_result_explainer().explain_doc( query=req.query, doc=req.doc, lang=req.lang) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return expl.to_dict() @app.post("/v1/admin/explain/results") def admin_explain_results(req: ExplainResultsRequest, request: Request): from ..middleware.result_explainer import get_result_explainer try: report = get_result_explainer().explain_results( query=req.query, docs=req.docs, lang=req.lang) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.post("/v1/admin/explain/reset") def admin_explain_reset(request: Request): from ..middleware.result_explainer import get_result_explainer get_result_explainer().reset() return {"ok": True} # ---- v3.32 Audit anomaly detector ----------------------------------- class AnomalyDetectRequest(BaseModel): records: List[Dict[str, Any]] class AnomalyBaselineActionsRequest(BaseModel): actor: str actions: List[str] class AnomalyBaselineVolumeRequest(BaseModel): action: str mean: float @app.get("/v1/admin/audit_anomaly") def admin_audit_anomaly_stats(): from ..middleware.audit_anomaly import get_audit_anomaly_detector return get_audit_anomaly_detector().stats() @app.post("/v1/admin/audit_anomaly/detect") def admin_audit_anomaly_detect(req: AnomalyDetectRequest, request: Request): from ..middleware.audit_anomaly import get_audit_anomaly_detector try: report = get_audit_anomaly_detector().detect(req.records) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.post("/v1/admin/audit_anomaly/baseline_actions") def admin_audit_anomaly_baseline_actions( req: AnomalyBaselineActionsRequest, request: Request ): from ..middleware.audit_anomaly import get_audit_anomaly_detector try: get_audit_anomaly_detector().add_baseline_actions( actor=req.actor, actions=req.actions) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/audit_anomaly/baseline_volume") def admin_audit_anomaly_baseline_volume( req: AnomalyBaselineVolumeRequest, request: Request ): from ..middleware.audit_anomaly import get_audit_anomaly_detector try: get_audit_anomaly_detector().set_baseline_volume( action=req.action, mean=req.mean) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/audit_anomaly/reset") def admin_audit_anomaly_reset(request: Request): from ..middleware.audit_anomaly import get_audit_anomaly_detector get_audit_anomaly_detector().reset() return {"ok": True} # ---- v3.33 LLM router ----------------------------------------------- class LLMBackendRequest(BaseModel): name: str cost_per_1k_input_tokens: float cost_per_1k_output_tokens: float avg_latency_ms: float quality_tier: float max_context_tokens: int = 200_000 replace: bool = False class LLMRouteRequest(BaseModel): query: str tier: Optional[str] = None budget_cents: Optional[float] = None latency_sla_ms: Optional[float] = None tenant_id: Optional[str] = None intent: Optional[str] = None n_subqueries: Optional[int] = None estimated_input_tokens: int = 500 estimated_output_tokens: int = 300 @app.get("/v1/admin/llm_router") def admin_llm_router_stats(): from ..middleware.llm_router import get_llm_router r = get_llm_router() return { "stats": r.stats(), "backends": r.list_backends(), "tenant_overrides": r.list_tenant_overrides(), } @app.post("/v1/admin/llm_router/backend") def admin_llm_router_add_backend(req: LLMBackendRequest, request: Request): from ..middleware.llm_router import ( get_llm_router, LLMBackend, ) try: get_llm_router().register_backend( LLMBackend( name=req.name, cost_per_1k_input_tokens=req.cost_per_1k_input_tokens, cost_per_1k_output_tokens=req.cost_per_1k_output_tokens, avg_latency_ms=req.avg_latency_ms, quality_tier=req.quality_tier, max_context_tokens=req.max_context_tokens, ), replace=req.replace, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.delete("/v1/admin/llm_router/backend/{name}") def admin_llm_router_remove_backend(name: str, request: Request): from ..middleware.llm_router import get_llm_router if not get_llm_router().unregister_backend(name): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True} @app.post("/v1/admin/llm_router/route") def admin_llm_router_route(req: LLMRouteRequest, request: Request): from ..middleware.llm_router import get_llm_router try: decision = get_llm_router().route( query=req.query, tier=req.tier, budget_cents=req.budget_cents, latency_sla_ms=req.latency_sla_ms, tenant_id=req.tenant_id, intent=req.intent, n_subqueries=req.n_subqueries, estimated_input_tokens=req.estimated_input_tokens, estimated_output_tokens=req.estimated_output_tokens, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return decision.to_dict() @app.post("/v1/admin/llm_router/reset") def admin_llm_router_reset(request: Request): from ..middleware.llm_router import get_llm_router get_llm_router().reset() return {"ok": True} # ---- v3.34 Citation expander ---------------------------------------- class CitationExpandRequest(BaseModel): text: str @app.get("/v1/admin/citation_expander") def admin_citation_expander_stats(): from ..middleware.citation_expander import get_citation_expander return get_citation_expander().stats() @app.post("/v1/admin/citation_expander/expand") def admin_citation_expander_expand(req: CitationExpandRequest, request: Request): from ..middleware.citation_expander import get_citation_expander try: result = get_citation_expander().expand(req.text) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/citation_expander/clear_cache") def admin_citation_expander_clear(request: Request): from ..middleware.citation_expander import get_citation_expander get_citation_expander().clear_cache() return {"ok": True} @app.post("/v1/admin/citation_expander/reset") def admin_citation_expander_reset(request: Request): from ..middleware.citation_expander import get_citation_expander get_citation_expander().reset() return {"ok": True} # ---- v3.35 Answer quality gate -------------------------------------- class QualityCheckRequest(BaseModel): answer: str confidence: Optional[float] = None grounded_ratio: Optional[float] = None template_report: Optional[Dict[str, Any]] = None class UnsafePatternRequest(BaseModel): pattern: str @app.get("/v1/admin/quality_gate") def admin_quality_gate_stats(): from ..middleware.answer_quality_gate import get_answer_quality_gate return get_answer_quality_gate().stats() @app.post("/v1/admin/quality_gate/check") def admin_quality_gate_check(req: QualityCheckRequest, request: Request): from ..middleware.answer_quality_gate import get_answer_quality_gate try: verdict = get_answer_quality_gate().check( answer=req.answer, confidence=req.confidence, grounded_ratio=req.grounded_ratio, template_report=req.template_report, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return verdict.to_dict() @app.post("/v1/admin/quality_gate/unsafe_pattern") def admin_quality_gate_add_pattern(req: UnsafePatternRequest, request: Request): from ..middleware.answer_quality_gate import get_answer_quality_gate try: get_answer_quality_gate().add_unsafe_pattern(req.pattern) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/quality_gate/reset") def admin_quality_gate_reset(request: Request): from ..middleware.answer_quality_gate import get_answer_quality_gate get_answer_quality_gate().reset() return {"ok": True} # ---- v3.36 Coverage monitor ----------------------------------------- class CoverageRecordRequest(BaseModel): query: str intent: Optional[str] = None topic: Optional[str] = None n_retrieved: int = 0 max_score: Optional[float] = None final_confidence: Optional[float] = None user_feedback: Optional[str] = None class CoverageFeedbackRequest(BaseModel): query_substring: str feedback: str @app.get("/v1/admin/coverage") def admin_coverage_stats(): from ..middleware.coverage_monitor import get_coverage_monitor return get_coverage_monitor().stats() @app.post("/v1/admin/coverage/record") def admin_coverage_record(req: CoverageRecordRequest, request: Request): from ..middleware.coverage_monitor import get_coverage_monitor try: get_coverage_monitor().record( query=req.query, intent=req.intent, topic=req.topic, n_retrieved=req.n_retrieved, max_score=req.max_score, final_confidence=req.final_confidence, user_feedback=req.user_feedback, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/coverage/feedback") def admin_coverage_feedback(req: CoverageFeedbackRequest, request: Request): from ..middleware.coverage_monitor import get_coverage_monitor try: n = get_coverage_monitor().record_feedback( query_substring=req.query_substring, feedback=req.feedback, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "updated": n} @app.get("/v1/admin/coverage/analyze") def admin_coverage_analyze(group_by: str = "both"): from ..middleware.coverage_monitor import get_coverage_monitor try: report = get_coverage_monitor().analyze(group_by=group_by) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.get("/v1/admin/coverage/gaps") def admin_coverage_gaps(group_by: str = "both"): from ..middleware.coverage_monitor import get_coverage_monitor try: gaps = get_coverage_monitor().gaps(group_by=group_by) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"gaps": [g.to_dict() for g in gaps]} @app.post("/v1/admin/coverage/reset") def admin_coverage_reset(request: Request): from ..middleware.coverage_monitor import get_coverage_monitor get_coverage_monitor().reset() return {"ok": True} # ---- v3.37 Follow-up rewriter --------------------------------------- class FollowupRewriteRequest(BaseModel): query: str history: Optional[List[str]] = None @app.get("/v1/admin/followup") def admin_followup_stats(): from ..middleware.followup_rewriter import get_followup_rewriter return get_followup_rewriter().stats() @app.post("/v1/admin/followup/rewrite") def admin_followup_rewrite(req: FollowupRewriteRequest, request: Request): from ..middleware.followup_rewriter import get_followup_rewriter try: result = get_followup_rewriter().rewrite( query=req.query, history=req.history) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/followup/reset") def admin_followup_reset(request: Request): from ..middleware.followup_rewriter import get_followup_rewriter get_followup_rewriter().reset() return {"ok": True} # ---- v3.38 Reasoning chain ------------------------------------------ class ReasoningStartRequest(BaseModel): chain_id: str query: str class ReasoningStepRequest(BaseModel): chain_id: str step_id: str step_type: str text: str support: Optional[List[str]] = None dependencies: Optional[List[str]] = None confidence: float = 1.0 metadata: Optional[Dict[str, Any]] = None @app.get("/v1/admin/reasoning") def admin_reasoning_stats(): from ..middleware.reasoning_chain import get_reasoning_recorder r = get_reasoning_recorder() return {"stats": r.stats(), "chains": r.list_chains()} @app.post("/v1/admin/reasoning/start") def admin_reasoning_start(req: ReasoningStartRequest, request: Request): from ..middleware.reasoning_chain import get_reasoning_recorder try: chain = get_reasoning_recorder().start_chain( chain_id=req.chain_id, query=req.query) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return chain.to_dict() @app.post("/v1/admin/reasoning/step") def admin_reasoning_step(req: ReasoningStepRequest, request: Request): from ..middleware.reasoning_chain import get_reasoning_recorder try: step = get_reasoning_recorder().add_step( chain_id=req.chain_id, step_id=req.step_id, step_type=req.step_type, text=req.text, support=req.support, dependencies=req.dependencies, confidence=req.confidence, metadata=req.metadata, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return step.to_dict() @app.get("/v1/admin/reasoning/validate/{chain_id}") def admin_reasoning_validate(chain_id: str): from ..middleware.reasoning_chain import get_reasoning_recorder return get_reasoning_recorder().validate(chain_id).to_dict() @app.post("/v1/admin/reasoning/finalize/{chain_id}") def admin_reasoning_finalize(chain_id: str, request: Request): from ..middleware.reasoning_chain import get_reasoning_recorder report = get_reasoning_recorder().finalize(chain_id) return report.to_dict() @app.get("/v1/admin/reasoning/chain/{chain_id}") def admin_reasoning_get(chain_id: str): from ..middleware.reasoning_chain import get_reasoning_recorder chain = get_reasoning_recorder().get_chain(chain_id) if chain is None: raise HTTPException(status_code=404, detail={"error": "not found"}) return chain @app.get("/v1/admin/reasoning/render/{chain_id}") def admin_reasoning_render(chain_id: str): from fastapi.responses import PlainTextResponse from ..middleware.reasoning_chain import get_reasoning_recorder md = get_reasoning_recorder().render_markdown(chain_id) return PlainTextResponse(md) @app.delete("/v1/admin/reasoning/chain/{chain_id}") def admin_reasoning_remove(chain_id: str, request: Request): from ..middleware.reasoning_chain import get_reasoning_recorder if not get_reasoning_recorder().remove_chain(chain_id): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True} @app.post("/v1/admin/reasoning/reset") def admin_reasoning_reset(request: Request): from ..middleware.reasoning_chain import get_reasoning_recorder get_reasoning_recorder().reset() return {"ok": True} # ---- v3.39 Injection detector --------------------------------------- class InjectionScanRequest(BaseModel): text: str source: str = "query" class InjectionScanDocsRequest(BaseModel): docs: List[Dict[str, Any]] class InjectionRuleRequest(BaseModel): rule_id: str category: str severity: str pattern: str replace: bool = False @app.get("/v1/admin/injection") def admin_injection_stats(): from ..middleware.injection_detector import get_injection_detector d = get_injection_detector() return {"stats": d.stats(), "rules": d.list_rules()} @app.post("/v1/admin/injection/scan") def admin_injection_scan(req: InjectionScanRequest, request: Request): from ..middleware.injection_detector import get_injection_detector try: result = get_injection_detector().scan( req.text, source=req.source) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/injection/scan_docs") def admin_injection_scan_docs(req: InjectionScanDocsRequest, request: Request): from ..middleware.injection_detector import get_injection_detector scans = get_injection_detector().scan_docs(req.docs) return {"scans": [s.to_dict() for s in scans]} @app.post("/v1/admin/injection/filter") def admin_injection_filter(req: InjectionScanDocsRequest, request: Request): from ..middleware.injection_detector import get_injection_detector safe, unsafe = get_injection_detector().filter_safe_docs(req.docs) return { "n_safe": len(safe), "n_unsafe": len(unsafe), "safe_doc_ids": [d.get("doc_id") or d.get("id", "") for d in safe], "unsafe_doc_ids": [d.get("doc_id") or d.get("id", "") for d in unsafe], } @app.post("/v1/admin/injection/rule") def admin_injection_add_rule(req: InjectionRuleRequest, request: Request): from ..middleware.injection_detector import ( get_injection_detector, InjectionRule, ) try: get_injection_detector().add_rule( InjectionRule( rule_id=req.rule_id, category=req.category, severity=req.severity, pattern=req.pattern, ), replace=req.replace, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.delete("/v1/admin/injection/rule/{rule_id}") def admin_injection_remove_rule(rule_id: str, request: Request): from ..middleware.injection_detector import get_injection_detector if not get_injection_detector().remove_rule(rule_id): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True} @app.post("/v1/admin/injection/reset") def admin_injection_reset(request: Request): from ..middleware.injection_detector import ( InjectionDetector, set_injection_detector, ) set_injection_detector(InjectionDetector()) return {"ok": True} # ---- v3.40 Cost forecaster ------------------------------------------ class StageProfileRequest(BaseModel): name: str base_cost_cents: float = 0.0 base_latency_ms: float = 0.0 per_token_cost_cents: float = 0.0 per_token_latency_ms: float = 0.0 per_doc_cost_cents: float = 0.0 replace: bool = False class ForecastRequest(BaseModel): query: str intent: Optional[str] = None n_subqueries: Optional[int] = None top_k: int = 10 estimated_output_tokens: int = 300 stages: Optional[List[str]] = None budget_cents: Optional[float] = None sla_ms: Optional[float] = None class RecordActualRequest(BaseModel): intent: str complexity: str actual_cost_cents: float actual_latency_ms: float @app.get("/v1/admin/cost_forecast") def admin_cost_forecast_stats(): from ..middleware.cost_forecaster import get_cost_forecaster f = get_cost_forecaster() return {"stats": f.stats(), "stages": f.list_stages()} @app.post("/v1/admin/cost_forecast/stage") def admin_cost_forecast_add_stage(req: StageProfileRequest, request: Request): from ..middleware.cost_forecaster import ( get_cost_forecaster, StageProfile, ) try: get_cost_forecaster().register_stage( StageProfile( name=req.name, base_cost_cents=req.base_cost_cents, base_latency_ms=req.base_latency_ms, per_token_cost_cents=req.per_token_cost_cents, per_token_latency_ms=req.per_token_latency_ms, per_doc_cost_cents=req.per_doc_cost_cents, ), replace=req.replace, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/cost_forecast/forecast") def admin_cost_forecast_run(req: ForecastRequest, request: Request): from ..middleware.cost_forecaster import get_cost_forecaster try: forecast = get_cost_forecaster().forecast( query=req.query, intent=req.intent, n_subqueries=req.n_subqueries, top_k=req.top_k, estimated_output_tokens=req.estimated_output_tokens, stages=req.stages, budget_cents=req.budget_cents, sla_ms=req.sla_ms, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return forecast.to_dict() @app.post("/v1/admin/cost_forecast/actual") def admin_cost_forecast_record(req: RecordActualRequest, request: Request): from ..middleware.cost_forecaster import get_cost_forecaster try: get_cost_forecaster().record_actual( intent=req.intent, complexity=req.complexity, actual_cost_cents=req.actual_cost_cents, actual_latency_ms=req.actual_latency_ms, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/cost_forecast/reset") def admin_cost_forecast_reset(request: Request): from ..middleware.cost_forecaster import get_cost_forecaster get_cost_forecaster().reset() return {"ok": True} # ---- v3.41 Cross-lingual bridge ------------------------------------- class BridgeRequest(BaseModel): query: str target_lang: Optional[str] = None source_lang: Optional[str] = None class AddTermRequest(BaseModel): en: str he: str domain: str = "general" confidence: float = 1.0 class TranslateTermRequest(BaseModel): term: str source_lang: str @app.get("/v1/admin/crosslingual") def admin_crosslingual_stats(): from ..middleware.crosslingual_bridge import get_crosslingual_bridge return get_crosslingual_bridge().stats() @app.post("/v1/admin/crosslingual/bridge") def admin_crosslingual_bridge(req: BridgeRequest, request: Request): from ..middleware.crosslingual_bridge import get_crosslingual_bridge try: result = get_crosslingual_bridge().bridge( query=req.query, target_lang=req.target_lang, source_lang=req.source_lang, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() @app.post("/v1/admin/crosslingual/term") def admin_crosslingual_add_term(req: AddTermRequest, request: Request): from ..middleware.crosslingual_bridge import get_crosslingual_bridge try: get_crosslingual_bridge().add_term( en=req.en, he=req.he, domain=req.domain, confidence=req.confidence, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/crosslingual/translate") def admin_crosslingual_translate(req: TranslateTermRequest, request: Request): from ..middleware.crosslingual_bridge import get_crosslingual_bridge try: result = get_crosslingual_bridge().translate_term( req.term, req.source_lang) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) if result is None: raise HTTPException(status_code=404, detail={"error": "term not in dictionary"}) return {"term": req.term, "translation": result} @app.get("/v1/admin/crosslingual/terms") def admin_crosslingual_list_terms( domain: Optional[str] = None, limit: int = 100, ): from ..middleware.crosslingual_bridge import get_crosslingual_bridge try: terms = get_crosslingual_bridge().list_terms( domain=domain, limit=limit) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"terms": terms} @app.post("/v1/admin/crosslingual/reset") def admin_crosslingual_reset(request: Request): from ..middleware.crosslingual_bridge import get_crosslingual_bridge get_crosslingual_bridge().reset() return {"ok": True} # ---- v3.42 Diversity enforcer --------------------------------------- class DiversityEnforceRequest(BaseModel): docs: List[Dict[str, Any]] top_k: int = 10 lambda_diversity: Optional[float] = None @app.get("/v1/admin/diversity") def admin_diversity_stats(): from ..middleware.diversity_enforcer import get_diversity_enforcer e = get_diversity_enforcer() return {"stats": e.stats(), "dimensions": e.list_dimensions()} @app.post("/v1/admin/diversity/enforce") def admin_diversity_enforce(req: DiversityEnforceRequest, request: Request): from ..middleware.diversity_enforcer import get_diversity_enforcer try: report = get_diversity_enforcer().enforce( docs=req.docs, top_k=req.top_k, lambda_diversity=req.lambda_diversity, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() class DiversityEnforcerLambdaRequest(BaseModel): value: float @app.post("/v1/admin/diversity_enforcer/lambda") def admin_diversity_enforcer_set_lambda( req: DiversityEnforcerLambdaRequest, request: Request, ): from ..middleware.diversity_enforcer import get_diversity_enforcer try: get_diversity_enforcer().set_lambda(req.value) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "lambda_diversity": req.value} @app.post("/v1/admin/diversity/reset") def admin_diversity_reset(request: Request): from ..middleware.diversity_enforcer import get_diversity_enforcer get_diversity_enforcer().reset() return {"ok": True} # ---- v3.43 Session exporter ----------------------------------------- class SessionExportRequest(BaseModel): session: Dict[str, Any] format: Optional[str] = None @app.get("/v1/admin/session_export") def admin_session_export_stats(): from ..middleware.session_exporter import get_session_exporter return get_session_exporter().stats() @app.post("/v1/admin/session_export/export") def admin_session_export(req: SessionExportRequest, request: Request): from ..middleware.session_exporter import ( get_session_exporter, SessionExporter, ) try: session = SessionExporter.session_from_dict(req.session) result = get_session_exporter().export(session, format=req.format) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return result.to_dict() class ExportFormatRequest(BaseModel): value: str class ExportAnonymizeRequest(BaseModel): value: bool @app.post("/v1/admin/session_export/format") def admin_session_export_format(req: ExportFormatRequest, request: Request): from ..middleware.session_exporter import get_session_exporter try: get_session_exporter().set_default_format(req.value) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "default_format": req.value} @app.post("/v1/admin/session_export/anonymize") def admin_session_export_anonymize(req: ExportAnonymizeRequest, request: Request): from ..middleware.session_exporter import get_session_exporter get_session_exporter().set_redactor_enabled(req.value) return {"ok": True, "redactor_enabled": req.value} @app.post("/v1/admin/session_export/reset") def admin_session_export_reset(request: Request): from ..middleware.session_exporter import get_session_exporter get_session_exporter().reset() return {"ok": True} # ---- v3.44 Fact consistency ----------------------------------------- class FactCheckRequest(BaseModel): answer: str class FactLawRequest(BaseModel): name: str year: int he_year: Optional[str] = None class FactCaseRequest(BaseModel): canonical_id: str metadata: Dict[str, Any] = {} class FactSectionRequest(BaseModel): key: str text: str law: Optional[str] = None @app.get("/v1/admin/fact_check") def admin_fact_check_stats(): from ..middleware.fact_consistency import get_fact_consistency_checker return get_fact_consistency_checker().stats() @app.post("/v1/admin/fact_check/check") def admin_fact_check(req: FactCheckRequest, request: Request): from ..middleware.fact_consistency import get_fact_consistency_checker try: report = get_fact_consistency_checker().check(req.answer) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.post("/v1/admin/fact_check/law") def admin_fact_check_add_law(req: FactLawRequest, request: Request): from ..middleware.fact_consistency import get_fact_consistency_checker try: get_fact_consistency_checker().add_law( name=req.name, year=req.year, he_year=req.he_year) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/fact_check/case") def admin_fact_check_add_case(req: FactCaseRequest, request: Request): from ..middleware.fact_consistency import get_fact_consistency_checker try: get_fact_consistency_checker().add_case( canonical_id=req.canonical_id, **req.metadata) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/fact_check/section") def admin_fact_check_add_section(req: FactSectionRequest, request: Request): from ..middleware.fact_consistency import get_fact_consistency_checker try: get_fact_consistency_checker().add_section( key=req.key, text=req.text, law=req.law) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/fact_check/reset") def admin_fact_check_reset(request: Request): from ..middleware.fact_consistency import get_fact_consistency_checker get_fact_consistency_checker().reset() return {"ok": True} # ---- v3.45 Issue spotter -------------------------------------------- class IssueScanRequest(BaseModel): doc_id: str text: str class IssueScanDocsRequest(BaseModel): docs: List[Dict[str, Any]] @app.get("/v1/admin/issue_spotter") def admin_issue_spotter_stats(): from ..middleware.issue_spotter import get_issue_spotter s = get_issue_spotter() return {"stats": s.stats(), "rules": s.list_rules()} @app.post("/v1/admin/issue_spotter/scan") def admin_issue_spotter_scan(req: IssueScanRequest, request: Request): from ..middleware.issue_spotter import get_issue_spotter try: report = get_issue_spotter().scan(req.doc_id, req.text) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.post("/v1/admin/issue_spotter/scan_docs") def admin_issue_spotter_scan_docs(req: IssueScanDocsRequest, request: Request): from ..middleware.issue_spotter import get_issue_spotter reports = get_issue_spotter().scan_docs(req.docs) return {"reports": [r.to_dict() for r in reports]} @app.post("/v1/admin/issue_spotter/reset") def admin_issue_spotter_reset(request: Request): from ..middleware.issue_spotter import ( IssueSpotter, set_issue_spotter, ) set_issue_spotter(IssueSpotter()) return {"ok": True} # ---- v3.46 Document classifier -------------------------------------- class ClassifyRequest(BaseModel): doc_id: str text: str class ClassifyManyRequest(BaseModel): docs: List[Dict[str, Any]] @app.get("/v1/admin/doc_classifier") def admin_doc_classifier_stats(): from ..middleware.doc_classifier import get_doc_classifier c = get_doc_classifier() return {"stats": c.stats(), "supported_types": c.supported_types()} @app.post("/v1/admin/doc_classifier/classify") def admin_doc_classifier_classify(req: ClassifyRequest, request: Request): from ..middleware.doc_classifier import get_doc_classifier try: r = get_doc_classifier().classify(req.doc_id, req.text) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return r.to_dict() @app.post("/v1/admin/doc_classifier/classify_many") def admin_doc_classifier_classify_many(req: ClassifyManyRequest, request: Request): from ..middleware.doc_classifier import get_doc_classifier results = get_doc_classifier().classify_many(req.docs) return {"results": [r.to_dict() for r in results]} @app.post("/v1/admin/doc_classifier/reset") def admin_doc_classifier_reset(request: Request): from ..middleware.doc_classifier import get_doc_classifier get_doc_classifier().reset() return {"ok": True} # ---- v3.47 Cache invalidator ---------------------------------------- class CacheRegisterRequest(BaseModel): cache_key: str doc_ids: List[str] tenant_id: Optional[str] = None source_types: Optional[List[str]] = None ttl_seconds: Optional[float] = None class CacheInvalidateDocRequest(BaseModel): doc_id: str reason: str = "doc_updated" actor: Optional[str] = None class CacheInvalidateBulkRequest(BaseModel): doc_ids: List[str] reason: str = "bulk" actor: Optional[str] = None class CacheInvalidatePatternRequest(BaseModel): pattern: str reason: str = "bulk" actor: Optional[str] = None @app.get("/v1/admin/cache_invalidator") def admin_cache_invalidator_stats(): from ..middleware.cache_invalidator import get_cache_invalidator return get_cache_invalidator().stats() @app.post("/v1/admin/cache_invalidator/register") def admin_cache_invalidator_register(req: CacheRegisterRequest, request: Request): from ..middleware.cache_invalidator import get_cache_invalidator try: get_cache_invalidator().register( cache_key=req.cache_key, doc_ids=req.doc_ids, tenant_id=req.tenant_id, source_types=req.source_types, ttl_seconds=req.ttl_seconds, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.delete("/v1/admin/cache_invalidator/register/{cache_key}") def admin_cache_invalidator_unregister(cache_key: str, request: Request): from ..middleware.cache_invalidator import get_cache_invalidator if not get_cache_invalidator().unregister(cache_key): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True} @app.post("/v1/admin/cache_invalidator/invalidate_doc") def admin_cache_invalidator_doc(req: CacheInvalidateDocRequest, request: Request): from ..middleware.cache_invalidator import get_cache_invalidator try: event = get_cache_invalidator().invalidate_doc( doc_id=req.doc_id, reason=req.reason, actor=req.actor, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return event.to_dict() @app.post("/v1/admin/cache_invalidator/invalidate_bulk") def admin_cache_invalidator_bulk(req: CacheInvalidateBulkRequest, request: Request): from ..middleware.cache_invalidator import get_cache_invalidator event = get_cache_invalidator().invalidate_docs( doc_ids=req.doc_ids, reason=req.reason, actor=req.actor, ) return event.to_dict() @app.post("/v1/admin/cache_invalidator/invalidate_pattern") def admin_cache_invalidator_pattern( req: CacheInvalidatePatternRequest, request: Request, ): from ..middleware.cache_invalidator import get_cache_invalidator try: event = get_cache_invalidator().invalidate_pattern( pattern=req.pattern, reason=req.reason, actor=req.actor, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return event.to_dict() @app.get("/v1/admin/cache_invalidator/history") def admin_cache_invalidator_history( limit: int = 50, reason: Optional[str] = None, ): from ..middleware.cache_invalidator import get_cache_invalidator try: return { "events": get_cache_invalidator().history( limit=limit, reason=reason), } except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) @app.post("/v1/admin/cache_invalidator/reset") def admin_cache_invalidator_reset(request: Request): from ..middleware.cache_invalidator import get_cache_invalidator get_cache_invalidator().reset() return {"ok": True} # ---- v3.48 Query analytics ------------------------------------------ class AnalyticsRecordRequest(BaseModel): query: str tenant_id: Optional[str] = None user_id: Optional[str] = None intent: Optional[str] = None latency_ms: Optional[float] = None cost_cents: Optional[float] = None ok: bool = True failed_stage: Optional[str] = None n_retrieved: int = 0 confidence: Optional[float] = None @app.get("/v1/admin/query_analytics") def admin_query_analytics_stats(): from ..middleware.query_analytics import get_query_analytics return get_query_analytics().stats() @app.post("/v1/admin/query_analytics/record") def admin_query_analytics_record(req: AnalyticsRecordRequest, request: Request): from ..middleware.query_analytics import get_query_analytics try: get_query_analytics().record( query=req.query, tenant_id=req.tenant_id, user_id=req.user_id, intent=req.intent, latency_ms=req.latency_ms, cost_cents=req.cost_cents, ok=req.ok, failed_stage=req.failed_stage, n_retrieved=req.n_retrieved, confidence=req.confidence, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.get("/v1/admin/query_analytics/summary") def admin_query_analytics_summary(since_ts: Optional[float] = None): from ..middleware.query_analytics import get_query_analytics return get_query_analytics().overall_summary(since_ts=since_ts) @app.get("/v1/admin/query_analytics/bucketed") def admin_query_analytics_bucketed( granularity: str = "hour", since_ts: Optional[float] = None, until_ts: Optional[float] = None, tenant_id: Optional[str] = None, intent: Optional[str] = None, ): from ..middleware.query_analytics import get_query_analytics try: buckets = get_query_analytics().bucketed( granularity=granularity, since_ts=since_ts, until_ts=until_ts, tenant_id=tenant_id, intent=intent, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"buckets": [b.to_dict() for b in buckets]} @app.get("/v1/admin/query_analytics/top") def admin_query_analytics_top(by: str = "volume", limit: int = 10, since_ts: Optional[float] = None): from ..middleware.query_analytics import get_query_analytics try: tops = get_query_analytics().top_queries( by=by, limit=limit, since_ts=since_ts) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"top": [t.to_dict() for t in tops]} @app.get("/v1/admin/query_analytics/tenants") def admin_query_analytics_tenants(since_ts: Optional[float] = None): from ..middleware.query_analytics import get_query_analytics summaries = get_query_analytics().tenant_summaries( since_ts=since_ts) return {"tenants": [s.to_dict() for s in summaries]} @app.post("/v1/admin/query_analytics/reset") def admin_query_analytics_reset(request: Request): from ..middleware.query_analytics import get_query_analytics get_query_analytics().reset() return {"ok": True} # ---- v3.49 Corpus router -------------------------------------------- class CorpusProfileRequest(BaseModel): name: str keywords: List[str] = [] intents: List[str] = [] topics: List[str] = [] cost_cents_per_query: float = 0.0 quality: float = 0.8 description: str = "" replace: bool = False class CorpusRouteRequest(BaseModel): query: str intent: Optional[str] = None topic: Optional[str] = None tenant_id: Optional[str] = None class CorpusTenantOverrideRequest(BaseModel): tenant_id: str corpus_name: str @app.get("/v1/admin/corpus_router") def admin_corpus_router_stats(): from ..middleware.corpus_router import get_corpus_router r = get_corpus_router() return { "stats": r.stats(), "corpora": r.list_corpora(), "tenant_overrides": r.list_tenant_overrides(), } @app.post("/v1/admin/corpus_router/corpus") def admin_corpus_router_register(req: CorpusProfileRequest, request: Request): from ..middleware.corpus_router import ( get_corpus_router, CorpusProfile, ) try: get_corpus_router().register_corpus( CorpusProfile( name=req.name, keywords=set(req.keywords), intents=set(req.intents), topics=set(req.topics), cost_cents_per_query=req.cost_cents_per_query, quality=req.quality, description=req.description, ), replace=req.replace, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.delete("/v1/admin/corpus_router/corpus/{name}") def admin_corpus_router_unregister(name: str, request: Request): from ..middleware.corpus_router import get_corpus_router if not get_corpus_router().unregister_corpus(name): raise HTTPException(status_code=404, detail={"error": "not found"}) return {"ok": True} @app.post("/v1/admin/corpus_router/route") def admin_corpus_router_route(req: CorpusRouteRequest, request: Request): from ..middleware.corpus_router import get_corpus_router try: decision = get_corpus_router().route( query=req.query, intent=req.intent, topic=req.topic, tenant_id=req.tenant_id, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return decision.to_dict() @app.post("/v1/admin/corpus_router/tenant_override") def admin_corpus_router_tenant_override( req: CorpusTenantOverrideRequest, request: Request, ): from ..middleware.corpus_router import get_corpus_router try: get_corpus_router().set_tenant_override( req.tenant_id, req.corpus_name) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/corpus_router/reset") def admin_corpus_router_reset(request: Request): from ..middleware.corpus_router import get_corpus_router get_corpus_router().reset() return {"ok": True} # ---- v3.50 Stream chunker ------------------------------------------- class StreamPushRequest(BaseModel): token: str class StreamChunkTextRequest(BaseModel): text: str boundary: Optional[str] = None class StreamBoundaryRequest(BaseModel): boundary: str @app.get("/v1/admin/stream_chunker") def admin_stream_chunker_stats(): from ..middleware.stream_chunker import get_stream_chunker return get_stream_chunker().stats().to_dict() @app.post("/v1/admin/stream_chunker/push") def admin_stream_chunker_push(req: StreamPushRequest, request: Request): from ..middleware.stream_chunker import get_stream_chunker try: chunks = get_stream_chunker().push(req.token) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"chunks": [c.to_dict() for c in chunks]} @app.post("/v1/admin/stream_chunker/flush") def admin_stream_chunker_flush(request: Request): from ..middleware.stream_chunker import get_stream_chunker c = get_stream_chunker().flush() return {"chunk": c.to_dict() if c else None} @app.post("/v1/admin/stream_chunker/chunk_text") def admin_stream_chunker_chunk_text(req: StreamChunkTextRequest, request: Request): from ..middleware.stream_chunker import get_stream_chunker chunker = get_stream_chunker() if req.boundary: try: chunker.set_boundary(req.boundary) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) chunks = chunker.chunk_text(req.text) return {"chunks": [c.to_dict() for c in chunks]} @app.post("/v1/admin/stream_chunker/boundary") def admin_stream_chunker_boundary(req: StreamBoundaryRequest, request: Request): from ..middleware.stream_chunker import get_stream_chunker try: get_stream_chunker().set_boundary(req.boundary) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True, "boundary": req.boundary} @app.post("/v1/admin/stream_chunker/reset") def admin_stream_chunker_reset(request: Request): from ..middleware.stream_chunker import get_stream_chunker get_stream_chunker().reset() return {"ok": True} # ---- v3.51 Knowledge graph extractor -------------------------------- class KGExtractRequest(BaseModel): doc_id: str text: str store: bool = True class KGExtractManyRequest(BaseModel): docs: List[Dict[str, Any]] store: bool = True class KGNeighborsRequest(BaseModel): entity: str direction: str = "both" @app.get("/v1/admin/kg_extractor") def admin_kg_extractor_stats(): from ..middleware.kg_extractor import get_kg_extractor return get_kg_extractor().stats() @app.post("/v1/admin/kg_extractor/extract") def admin_kg_extractor_extract(req: KGExtractRequest, request: Request): from ..middleware.kg_extractor import get_kg_extractor try: report = get_kg_extractor().extract( req.doc_id, req.text, store=req.store) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.post("/v1/admin/kg_extractor/extract_many") def admin_kg_extractor_extract_many(req: KGExtractManyRequest, request: Request): from ..middleware.kg_extractor import get_kg_extractor reports = get_kg_extractor().extract_many( req.docs, store=req.store) return {"reports": [r.to_dict() for r in reports]} @app.get("/v1/admin/kg_extractor/snapshot") def admin_kg_extractor_snapshot(top_n: int = 10): from ..middleware.kg_extractor import get_kg_extractor try: s = get_kg_extractor().snapshot(top_n=top_n) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return s.to_dict() @app.post("/v1/admin/kg_extractor/neighbors") def admin_kg_extractor_neighbors(req: KGNeighborsRequest, request: Request): from ..middleware.kg_extractor import get_kg_extractor try: triples = get_kg_extractor().neighbors( req.entity, direction=req.direction) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"triples": [t.to_dict() for t in triples]} @app.post("/v1/admin/kg_extractor/reset") def admin_kg_extractor_reset(request: Request): from ..middleware.kg_extractor import get_kg_extractor get_kg_extractor().reset() return {"ok": True} # ---- v3.52 Citation network analyzer -------------------------------- class CitationEdgeRequest(BaseModel): citing: str cited: str weight: float = 1.0 edge_type: str = "cites" class CitationBulkRequest(BaseModel): edges: List[Dict[str, Any]] @app.get("/v1/admin/citation_network") def admin_citation_network_stats(): from ..middleware.citation_network import get_citation_network return get_citation_network().stats() @app.post("/v1/admin/citation_network/edge") def admin_citation_network_add_edge(req: CitationEdgeRequest, request: Request): from ..middleware.citation_network import get_citation_network try: get_citation_network().add_edge( citing=req.citing, cited=req.cited, weight=req.weight, edge_type=req.edge_type, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/citation_network/edges_bulk") def admin_citation_network_bulk(req: CitationBulkRequest, request: Request): from ..middleware.citation_network import get_citation_network n = get_citation_network().add_edges_bulk(req.edges) return {"ok": True, "added": n} @app.delete("/v1/admin/citation_network/doc/{doc_id}") def admin_citation_network_remove_doc(doc_id: str, request: Request): from ..middleware.citation_network import get_citation_network n = get_citation_network().remove_doc(doc_id) return {"ok": True, "removed_edges": n} @app.get("/v1/admin/citation_network/pagerank") def admin_citation_network_pagerank(top_n: int = 20): from ..middleware.citation_network import get_citation_network try: rankings = get_citation_network().pagerank(top_n=top_n) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"rankings": [r.to_dict() for r in rankings]} @app.get("/v1/admin/citation_network/hits") def admin_citation_network_hits(top_n: int = 20): from ..middleware.citation_network import get_citation_network try: scores = get_citation_network().hits(top_n=top_n) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"scores": [s.to_dict() for s in scores]} @app.get("/v1/admin/citation_network/top_cited") def admin_citation_network_top_cited(top_n: int = 10): from ..middleware.citation_network import get_citation_network try: top = get_citation_network().top_cited(top_n=top_n) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"top_cited": [ {"doc_id": d, "in_degree": c} for d, c in top ]} @app.get("/v1/admin/citation_network/components") def admin_citation_network_components(): from ..middleware.citation_network import get_citation_network components = get_citation_network().connected_components() return {"components": [c.to_dict() for c in components]} @app.post("/v1/admin/citation_network/reset") def admin_citation_network_reset(request: Request): from ..middleware.citation_network import get_citation_network get_citation_network().reset() return {"ok": True} # ---- v3.53 Query template extractor --------------------------------- class TemplateRecordQueryRequest(BaseModel): query: str class TemplateRecordManyRequest(BaseModel): queries: List[str] class TemplateExtractRequest(BaseModel): min_frequency: Optional[int] = None class TemplateNormalizeRequest(BaseModel): query: str @app.get("/v1/admin/template_extractor") def admin_template_extractor_stats(): from ..middleware.template_extractor import get_template_extractor return get_template_extractor().stats() @app.post("/v1/admin/template_extractor/record") def admin_template_extractor_record(req: TemplateRecordQueryRequest, request: Request): from ..middleware.template_extractor import get_template_extractor try: get_template_extractor().record(req.query) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.post("/v1/admin/template_extractor/record_many") def admin_template_extractor_record_many( req: TemplateRecordManyRequest, request: Request, ): from ..middleware.template_extractor import get_template_extractor n = get_template_extractor().record_many(req.queries) return {"ok": True, "added": n} @app.post("/v1/admin/template_extractor/extract") def admin_template_extractor_extract(req: TemplateExtractRequest, request: Request): from ..middleware.template_extractor import get_template_extractor try: report = get_template_extractor().extract( min_frequency=req.min_frequency) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return report.to_dict() @app.post("/v1/admin/template_extractor/normalize") def admin_template_extractor_normalize( req: TemplateNormalizeRequest, request: Request, ): from ..middleware.template_extractor import get_template_extractor try: return get_template_extractor().normalize(req.query) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) @app.post("/v1/admin/template_extractor/reset") def admin_template_extractor_reset(request: Request): from ..middleware.template_extractor import get_template_extractor get_template_extractor().reset() return {"ok": True} # ---- v3.54 Slow query analyzer -------------------------------------- class SlowRecordRequest(BaseModel): query: str total_latency_ms: float stages: Optional[Dict[str, float]] = None n_retrieved: int = 0 metadata: Optional[Dict[str, Any]] = None @app.get("/v1/admin/slow_query_analyzer") def admin_slow_query_analyzer_stats(): from ..middleware.slow_query_analyzer import get_slow_query_analyzer return get_slow_query_analyzer().stats() @app.post("/v1/admin/slow_query_analyzer/record") def admin_slow_query_analyzer_record(req: SlowRecordRequest, request: Request): from ..middleware.slow_query_analyzer import get_slow_query_analyzer try: get_slow_query_analyzer().record( query=req.query, total_latency_ms=req.total_latency_ms, stages=req.stages, n_retrieved=req.n_retrieved, metadata=req.metadata, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.get("/v1/admin/slow_query_analyzer/analyze") def admin_slow_query_analyzer_analyze(): from ..middleware.slow_query_analyzer import get_slow_query_analyzer report = get_slow_query_analyzer().analyze() return report.to_dict() @app.post("/v1/admin/slow_query_analyzer/reset") def admin_slow_query_analyzer_reset(request: Request): from ..middleware.slow_query_analyzer import get_slow_query_analyzer get_slow_query_analyzer().reset() return {"ok": True} # ---- v3.55 Confidence calibrator ------------------------------------ class CalibrationRecordRequest(BaseModel): predicted: float actual: bool metadata: Optional[Dict[str, Any]] = None class CalibrationCalibrateRequest(BaseModel): raw: float @app.get("/v1/admin/confidence_calibrator") def admin_confidence_calibrator_stats(): from ..middleware.confidence_calibrator import get_confidence_calibrator return get_confidence_calibrator().stats() @app.post("/v1/admin/confidence_calibrator/record") def admin_confidence_calibrator_record(req: CalibrationRecordRequest, request: Request): from ..middleware.confidence_calibrator import get_confidence_calibrator try: get_confidence_calibrator().record( predicted=req.predicted, actual=req.actual, metadata=req.metadata, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"ok": True} @app.get("/v1/admin/confidence_calibrator/analyze") def admin_confidence_calibrator_analyze(): from ..middleware.confidence_calibrator import get_confidence_calibrator return get_confidence_calibrator().analyze().to_dict() @app.post("/v1/admin/confidence_calibrator/calibrate") def admin_confidence_calibrator_calibrate( req: CalibrationCalibrateRequest, request: Request, ): from ..middleware.confidence_calibrator import get_confidence_calibrator try: cal = get_confidence_calibrator().calibrated_confidence( req.raw) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"raw": req.raw, "calibrated": cal} @app.get("/v1/admin/confidence_calibrator/curve") def admin_confidence_calibrator_curve(): from ..middleware.confidence_calibrator import get_confidence_calibrator return {"points": get_confidence_calibrator().calibration_curve()} @app.post("/v1/admin/confidence_calibrator/reset") def admin_confidence_calibrator_reset(request: Request): from ..middleware.confidence_calibrator import get_confidence_calibrator get_confidence_calibrator().reset() return {"ok": True} # ---- v3.56 Preview generator ---------------------------------------- class PreviewGenerateRequest(BaseModel): doc_id: str text: str query: str max_length: Optional[int] = None window: Optional[int] = None n_snippets: Optional[int] = None highlight_mode: Optional[str] = None class PreviewGenerateManyRequest(BaseModel): docs: List[Dict[str, Any]] query: str max_length: Optional[int] = None window: Optional[int] = None n_snippets: Optional[int] = None highlight_mode: Optional[str] = None @app.get("/v1/admin/preview_generator") def admin_preview_generator_stats(): from ..middleware.preview_generator import get_preview_generator return get_preview_generator().stats() @app.post("/v1/admin/preview_generator/generate") def admin_preview_generator_generate(req: PreviewGenerateRequest, request: Request): from ..middleware.preview_generator import get_preview_generator try: preview = get_preview_generator().generate( doc_id=req.doc_id, text=req.text, query=req.query, max_length=req.max_length, window=req.window, n_snippets=req.n_snippets, highlight_mode=req.highlight_mode, ) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return preview.to_dict() @app.post("/v1/admin/preview_generator/generate_many") def admin_preview_generator_generate_many( req: PreviewGenerateManyRequest, request: Request, ): from ..middleware.preview_generator import get_preview_generator kwargs = {} for f in ("max_length", "window", "n_snippets", "highlight_mode"): v = getattr(req, f) if v is not None: kwargs[f] = v try: previews = get_preview_generator().generate_many( req.docs, req.query, **kwargs) except ValueError as e: raise HTTPException(status_code=400, detail={"error": str(e)}) return {"previews": [p.to_dict() for p in previews]} @app.post("/v1/admin/preview_generator/reset") def admin_preview_generator_reset(request: Request): from ..middleware.preview_generator import get_preview_generator get_preview_generator().reset() return {"ok": True} # =========================== v3.57 Retrieval Agreement Scorer === class AgreementScoreRequest(BaseModel): retriever_results: Dict[str, List[str]] @app.get("/v1/admin/retrieval_agreement") def admin_retrieval_agreement_stats(): from ..middleware.retrieval_agreement import \ get_retrieval_agreement_scorer return get_retrieval_agreement_scorer().stats() @app.post("/v1/admin/retrieval_agreement/score") def admin_retrieval_agreement_score( req: AgreementScoreRequest, request: Request): from ..middleware.retrieval_agreement import \ get_retrieval_agreement_scorer try: report = get_retrieval_agreement_scorer().score( req.retriever_results) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/retrieval_agreement/reset") def admin_retrieval_agreement_reset(request: Request): from ..middleware.retrieval_agreement import \ get_retrieval_agreement_scorer get_retrieval_agreement_scorer().reset() return {"ok": True} # =========================== v3.58 Answer Source Balance ======== class SourceBalanceRequest(BaseModel): citations: List[str] @app.get("/v1/admin/answer_source_balance") def admin_answer_source_balance_stats(): from ..middleware.answer_source_balance import \ get_answer_source_balancer return get_answer_source_balancer().stats() @app.post("/v1/admin/answer_source_balance/analyze") def admin_answer_source_balance_analyze( req: SourceBalanceRequest, request: Request): from ..middleware.answer_source_balance import \ get_answer_source_balancer try: report = get_answer_source_balancer().analyze( req.citations) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/answer_source_balance/reset") def admin_answer_source_balance_reset(request: Request): from ..middleware.answer_source_balance import \ get_answer_source_balancer get_answer_source_balancer().reset() return {"ok": True} # =========================== v3.59 Doc Staleness Scorer ========= class StalenessScoreRequest(BaseModel): doc_id: str age_days: float citation_count: int = 0 superseded: bool = False class StalenessScoreManyRequest(BaseModel): docs: List[Dict[str, Any]] @app.get("/v1/admin/doc_staleness") def admin_doc_staleness_stats(): from ..middleware.doc_staleness import get_doc_staleness_scorer return get_doc_staleness_scorer().stats() @app.post("/v1/admin/doc_staleness/score") def admin_doc_staleness_score( req: StalenessScoreRequest, request: Request): from ..middleware.doc_staleness import get_doc_staleness_scorer try: s = get_doc_staleness_scorer().score_doc( doc_id=req.doc_id, age_days=req.age_days, citation_count=req.citation_count, superseded=req.superseded, ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return s.to_dict() @app.post("/v1/admin/doc_staleness/score_many") def admin_doc_staleness_score_many( req: StalenessScoreManyRequest, request: Request): from ..middleware.doc_staleness import get_doc_staleness_scorer scorer = get_doc_staleness_scorer() scores = scorer.score_many(req.docs) return { "scores": [s.to_dict() for s in scores], "summary": scorer.summarize(scores), } @app.post("/v1/admin/doc_staleness/reset") def admin_doc_staleness_reset(request: Request): from ..middleware.doc_staleness import get_doc_staleness_scorer get_doc_staleness_scorer().reset() return {"ok": True} # =========================== v3.60 Retrieval Coverage =========== class CoverageAnalyzeRequest(BaseModel): query: str docs: List[Dict[str, Any]] @app.get("/v1/admin/retrieval_coverage") def admin_retrieval_coverage_stats(): from ..middleware.retrieval_coverage import \ get_retrieval_coverage_analyzer return get_retrieval_coverage_analyzer().stats() @app.post("/v1/admin/retrieval_coverage/analyze") def admin_retrieval_coverage_analyze( req: CoverageAnalyzeRequest, request: Request): from ..middleware.retrieval_coverage import \ get_retrieval_coverage_analyzer try: report = get_retrieval_coverage_analyzer().analyze( req.query, req.docs) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/retrieval_coverage/reset") def admin_retrieval_coverage_reset(request: Request): from ..middleware.retrieval_coverage import \ get_retrieval_coverage_analyzer get_retrieval_coverage_analyzer().reset() return {"ok": True} # =========================== v3.61 Answer Hedging =============== class HedgingDetectRequest(BaseModel): text: str @app.get("/v1/admin/answer_hedging") def admin_answer_hedging_stats(): from ..middleware.answer_hedging import \ get_answer_hedging_detector return get_answer_hedging_detector().stats() @app.post("/v1/admin/answer_hedging/detect") def admin_answer_hedging_detect( req: HedgingDetectRequest, request: Request): from ..middleware.answer_hedging import \ get_answer_hedging_detector try: report = get_answer_hedging_detector().detect(req.text) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/answer_hedging/reset") def admin_answer_hedging_reset(request: Request): from ..middleware.answer_hedging import \ get_answer_hedging_detector get_answer_hedging_detector().reset() return {"ok": True} # =========================== v3.62 Query Routing Optimizer ====== class RouterRecordRequest(BaseModel): query: str retriever: str outcome_score: float class RouterRecommendRequest(BaseModel): query: str @app.get("/v1/admin/query_routing_optimizer") def admin_query_routing_stats(): from ..middleware.query_routing_optimizer import \ get_query_routing_optimizer return get_query_routing_optimizer().stats() @app.post("/v1/admin/query_routing_optimizer/record") def admin_query_routing_record( req: RouterRecordRequest, request: Request): from ..middleware.query_routing_optimizer import \ get_query_routing_optimizer try: get_query_routing_optimizer().record( req.query, req.retriever, req.outcome_score) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return {"ok": True} @app.post("/v1/admin/query_routing_optimizer/recommend") def admin_query_routing_recommend( req: RouterRecommendRequest, request: Request): from ..middleware.query_routing_optimizer import \ get_query_routing_optimizer try: rec = get_query_routing_optimizer().recommend(req.query) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return rec.to_dict() @app.get("/v1/admin/query_routing_optimizer/buckets") def admin_query_routing_buckets(): from ..middleware.query_routing_optimizer import \ get_query_routing_optimizer return get_query_routing_optimizer().buckets() @app.post("/v1/admin/query_routing_optimizer/reset") def admin_query_routing_reset(request: Request): from ..middleware.query_routing_optimizer import \ get_query_routing_optimizer get_query_routing_optimizer().reset() return {"ok": True} # =========================== v3.63 Answer Specificity =========== class SpecificityScoreRequest(BaseModel): text: str @app.get("/v1/admin/answer_specificity") def admin_answer_specificity_stats(): from ..middleware.answer_specificity import \ get_answer_specificity_scorer return get_answer_specificity_scorer().stats() @app.post("/v1/admin/answer_specificity/score") def admin_answer_specificity_score( req: SpecificityScoreRequest, request: Request): from ..middleware.answer_specificity import \ get_answer_specificity_scorer try: report = get_answer_specificity_scorer().score(req.text) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/answer_specificity/reset") def admin_answer_specificity_reset(request: Request): from ..middleware.answer_specificity import \ get_answer_specificity_scorer get_answer_specificity_scorer().reset() return {"ok": True} # =========================== v3.64 Rank Stability =============== class StabilityRecordRequest(BaseModel): query: str top_k: List[str] @app.get("/v1/admin/rank_stability") def admin_rank_stability_stats(): from ..middleware.rank_stability import \ get_rank_stability_tracker return get_rank_stability_tracker().stats() @app.post("/v1/admin/rank_stability/record") def admin_rank_stability_record( req: StabilityRecordRequest, request: Request): from ..middleware.rank_stability import \ get_rank_stability_tracker try: get_rank_stability_tracker().record( req.query, req.top_k) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return {"ok": True} @app.get("/v1/admin/rank_stability/analyze") def admin_rank_stability_analyze(query: str): from ..middleware.rank_stability import \ get_rank_stability_tracker return get_rank_stability_tracker().analyze(query).to_dict() @app.get("/v1/admin/rank_stability/flaky") def admin_rank_stability_flaky(): from ..middleware.rank_stability import \ get_rank_stability_tracker return {"flaky_queries": get_rank_stability_tracker().flaky_queries()} @app.post("/v1/admin/rank_stability/reset") def admin_rank_stability_reset(request: Request): from ..middleware.rank_stability import \ get_rank_stability_tracker get_rank_stability_tracker().reset() return {"ok": True} # =========================== v3.65 Session Topic Tracker ======== class SessionRecordRequest(BaseModel): session_id: str query: str @app.get("/v1/admin/session_topic_tracker") def admin_session_topic_stats(): from ..middleware.session_topic_tracker import \ get_session_topic_tracker return get_session_topic_tracker().stats() @app.post("/v1/admin/session_topic_tracker/record") def admin_session_topic_record( req: SessionRecordRequest, request: Request): from ..middleware.session_topic_tracker import \ get_session_topic_tracker try: turn = get_session_topic_tracker().record( req.session_id, req.query) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return turn.to_dict() @app.get("/v1/admin/session_topic_tracker/analyze") def admin_session_topic_analyze(session_id: str): from ..middleware.session_topic_tracker import \ get_session_topic_tracker return get_session_topic_tracker().analyze_session( session_id).to_dict() @app.get("/v1/admin/session_topic_tracker/sessions") def admin_session_topic_sessions(): from ..middleware.session_topic_tracker import \ get_session_topic_tracker return {"sessions": get_session_topic_tracker().sessions()} @app.post("/v1/admin/session_topic_tracker/reset") def admin_session_topic_reset(request: Request): from ..middleware.session_topic_tracker import \ get_session_topic_tracker get_session_topic_tracker().reset() return {"ok": True} # =========================== v3.66 Snippet Deduplicator ========= class SnippetDedupRequest(BaseModel): snippets: List[Dict[str, Any]] @app.get("/v1/admin/snippet_dedup") def admin_snippet_dedup_stats(): from ..middleware.snippet_dedup import \ get_snippet_deduplicator return get_snippet_deduplicator().stats() @app.post("/v1/admin/snippet_dedup/dedupe") def admin_snippet_dedup_dedupe( req: SnippetDedupRequest, request: Request): from ..middleware.snippet_dedup import \ get_snippet_deduplicator try: report = get_snippet_deduplicator().dedupe(req.snippets) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/snippet_dedup/reset") def admin_snippet_dedup_reset(request: Request): from ..middleware.snippet_dedup import \ get_snippet_deduplicator get_snippet_deduplicator().reset() return {"ok": True} # =========================== v3.67 Citation Normalizer ========== class CitationNormalizeRequest(BaseModel): text: str @app.get("/v1/admin/citation_normalizer") def admin_citation_normalizer_stats(): from ..middleware.citation_normalizer import \ get_citation_style_normalizer return get_citation_style_normalizer().stats() @app.post("/v1/admin/citation_normalizer/normalize") def admin_citation_normalizer_normalize( req: CitationNormalizeRequest, request: Request): from ..middleware.citation_normalizer import \ get_citation_style_normalizer try: report = get_citation_style_normalizer().normalize( req.text) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/citation_normalizer/reset") def admin_citation_normalizer_reset(request: Request): from ..middleware.citation_normalizer import \ get_citation_style_normalizer get_citation_style_normalizer().reset() return {"ok": True} # =========================== v3.68 Query Precision ============== class PrecisionClassifyRequest(BaseModel): query: str class PrecisionClassifyManyRequest(BaseModel): queries: List[str] @app.get("/v1/admin/query_precision_classifier") def admin_query_precision_stats(): from ..middleware.query_precision_classifier import \ get_query_precision_classifier return get_query_precision_classifier().stats() @app.post("/v1/admin/query_precision_classifier/classify") def admin_query_precision_classify( req: PrecisionClassifyRequest, request: Request): from ..middleware.query_precision_classifier import \ get_query_precision_classifier try: report = get_query_precision_classifier().classify( req.query) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/query_precision_classifier/classify_many") def admin_query_precision_classify_many( req: PrecisionClassifyManyRequest, request: Request): from ..middleware.query_precision_classifier import \ get_query_precision_classifier reports = get_query_precision_classifier().classify_many( req.queries) return {"reports": [r.to_dict() for r in reports]} @app.post("/v1/admin/query_precision_classifier/reset") def admin_query_precision_reset(request: Request): from ..middleware.query_precision_classifier import \ get_query_precision_classifier get_query_precision_classifier().reset() return {"ok": True} # =========================== v3.69 Answer Format Validator ====== class FormatValidateRequest(BaseModel): text: str format_name: str class FormatDetectRequest(BaseModel): text: str @app.get("/v1/admin/answer_format_validator") def admin_answer_format_stats(): from ..middleware.answer_format_validator import \ get_answer_format_validator return get_answer_format_validator().stats() @app.post("/v1/admin/answer_format_validator/validate") def admin_answer_format_validate( req: FormatValidateRequest, request: Request): from ..middleware.answer_format_validator import \ get_answer_format_validator try: report = get_answer_format_validator().validate( req.text, req.format_name) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/answer_format_validator/detect") def admin_answer_format_detect( req: FormatDetectRequest, request: Request): from ..middleware.answer_format_validator import \ get_answer_format_validator try: fmt = get_answer_format_validator().detect_format( req.text) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return {"detected_format": fmt} @app.get("/v1/admin/answer_format_validator/formats") def admin_answer_format_list(): from ..middleware.answer_format_validator import \ get_answer_format_validator return {"formats": get_answer_format_validator().available_formats()} @app.post("/v1/admin/answer_format_validator/reset") def admin_answer_format_reset(request: Request): from ..middleware.answer_format_validator import \ get_answer_format_validator get_answer_format_validator().reset() return {"ok": True} # =========================== v3.70 Retrieval Delta ============== class DeltaAnalyzeRequest(BaseModel): run_a: List[str] run_b: List[str] @app.get("/v1/admin/retrieval_delta") def admin_retrieval_delta_stats(): from ..middleware.retrieval_delta import \ get_retrieval_delta_analyzer return get_retrieval_delta_analyzer().stats() @app.post("/v1/admin/retrieval_delta/analyze") def admin_retrieval_delta_analyze( req: DeltaAnalyzeRequest, request: Request): from ..middleware.retrieval_delta import \ get_retrieval_delta_analyzer try: report = get_retrieval_delta_analyzer().analyze( req.run_a, req.run_b) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/retrieval_delta/reset") def admin_retrieval_delta_reset(request: Request): from ..middleware.retrieval_delta import \ get_retrieval_delta_analyzer get_retrieval_delta_analyzer().reset() return {"ok": True} # =========================== v3.71 Query Paraphrase ============= class ParaphraseCompareRequest(BaseModel): query_a: str query_b: str class ParaphraseFindRequest(BaseModel): query: str candidates: List[str] @app.get("/v1/admin/query_paraphrase") def admin_query_paraphrase_stats(): from ..middleware.query_paraphrase import \ get_query_paraphrase_detector return get_query_paraphrase_detector().stats() @app.post("/v1/admin/query_paraphrase/compare") def admin_query_paraphrase_compare( req: ParaphraseCompareRequest, request: Request): from ..middleware.query_paraphrase import \ get_query_paraphrase_detector try: report = get_query_paraphrase_detector().compare( req.query_a, req.query_b) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/query_paraphrase/find") def admin_query_paraphrase_find( req: ParaphraseFindRequest, request: Request): from ..middleware.query_paraphrase import \ get_query_paraphrase_detector try: reports = get_query_paraphrase_detector().find_paraphrases( req.query, req.candidates) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return {"reports": [r.to_dict() for r in reports]} @app.post("/v1/admin/query_paraphrase/reset") def admin_query_paraphrase_reset(request: Request): from ..middleware.query_paraphrase import \ get_query_paraphrase_detector get_query_paraphrase_detector().reset() return {"ok": True} # =========================== v3.72 Doc Quality Scorer =========== class QualityScoreRequest(BaseModel): doc_id: str text: str class QualityScoreManyRequest(BaseModel): docs: List[Dict[str, Any]] @app.get("/v1/admin/doc_quality") def admin_doc_quality_stats(): from ..middleware.doc_quality import get_doc_quality_scorer return get_doc_quality_scorer().stats() @app.post("/v1/admin/doc_quality/score") def admin_doc_quality_score( req: QualityScoreRequest, request: Request): from ..middleware.doc_quality import get_doc_quality_scorer try: report = get_doc_quality_scorer().score( req.doc_id, req.text) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/doc_quality/score_many") def admin_doc_quality_score_many( req: QualityScoreManyRequest, request: Request): from ..middleware.doc_quality import get_doc_quality_scorer scorer = get_doc_quality_scorer() reports = scorer.score_many(req.docs) return { "reports": [r.to_dict() for r in reports], "summary": scorer.summarize(reports), } @app.post("/v1/admin/doc_quality/reset") def admin_doc_quality_reset(request: Request): from ..middleware.doc_quality import get_doc_quality_scorer get_doc_quality_scorer().reset() return {"ok": True} # =========================== v3.73 Answer Coverage Gap ========== class CoverageGapCheckRequest(BaseModel): query: str answer: str @app.get("/v1/admin/answer_coverage_gap") def admin_answer_coverage_gap_stats(): from ..middleware.answer_coverage_gap import \ get_answer_coverage_gap_detector return get_answer_coverage_gap_detector().stats() @app.post("/v1/admin/answer_coverage_gap/check") def admin_answer_coverage_gap_check( req: CoverageGapCheckRequest, request: Request): from ..middleware.answer_coverage_gap import \ get_answer_coverage_gap_detector try: report = get_answer_coverage_gap_detector().check( req.query, req.answer) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/answer_coverage_gap/reset") def admin_answer_coverage_gap_reset(request: Request): from ..middleware.answer_coverage_gap import \ get_answer_coverage_gap_detector get_answer_coverage_gap_detector().reset() return {"ok": True} # =========================== v3.74 Query Throughput ============= class ThroughputRecordRequest(BaseModel): latency_ms: float @app.get("/v1/admin/query_throughput") def admin_query_throughput_stats(): from ..middleware.query_throughput import \ get_query_throughput_profiler return get_query_throughput_profiler().stats() @app.post("/v1/admin/query_throughput/record") def admin_query_throughput_record( req: ThroughputRecordRequest, request: Request): from ..middleware.query_throughput import \ get_query_throughput_profiler try: get_query_throughput_profiler().record(req.latency_ms) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return {"ok": True} @app.get("/v1/admin/query_throughput/analyze") def admin_query_throughput_analyze(): from ..middleware.query_throughput import \ get_query_throughput_profiler return get_query_throughput_profiler().analyze().to_dict() @app.post("/v1/admin/query_throughput/reset") def admin_query_throughput_reset(request: Request): from ..middleware.query_throughput import \ get_query_throughput_profiler get_query_throughput_profiler().reset() return {"ok": True} # =========================== v3.75 Query Normalizer ============= class QueryNormalizeRequest(BaseModel): text: str strip_nikud: Optional[bool] = None lowercase: Optional[bool] = None strip_punctuation: Optional[bool] = None strip_question_marks: Optional[bool] = None @app.get("/v1/admin/query_normalizer") def admin_query_normalizer_stats(): from ..middleware.query_normalizer import \ get_query_normalizer return get_query_normalizer().stats() @app.post("/v1/admin/query_normalizer/normalize") def admin_query_normalizer_normalize( req: QueryNormalizeRequest, request: Request): from ..middleware.query_normalizer import \ get_query_normalizer, NormalizeOptions n = get_query_normalizer() defaults = n.stats()["defaults"] opts = NormalizeOptions( strip_nikud=req.strip_nikud if req.strip_nikud is not None else defaults["strip_nikud"], lowercase=req.lowercase if req.lowercase is not None else defaults["lowercase"], strip_punctuation=req.strip_punctuation if req.strip_punctuation is not None else defaults["strip_punctuation"], strip_question_marks=req.strip_question_marks if req.strip_question_marks is not None else defaults["strip_question_marks"], ) try: result = n.normalize(req.text, opts) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return result.to_dict() @app.post("/v1/admin/query_normalizer/reset") def admin_query_normalizer_reset(request: Request): from ..middleware.query_normalizer import \ get_query_normalizer get_query_normalizer().reset() return {"ok": True} # =========================== v3.76 Chunk Overlap Analyzer ======= class ChunkAnalyzeRequest(BaseModel): chunks: List[str] source_text: Optional[str] = None @app.get("/v1/admin/chunk_overlap") def admin_chunk_overlap_stats(): from ..middleware.chunk_overlap import \ get_chunk_overlap_analyzer return get_chunk_overlap_analyzer().stats() @app.post("/v1/admin/chunk_overlap/analyze") def admin_chunk_overlap_analyze( req: ChunkAnalyzeRequest, request: Request): from ..middleware.chunk_overlap import \ get_chunk_overlap_analyzer try: report = get_chunk_overlap_analyzer().analyze( req.chunks, req.source_text) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/chunk_overlap/reset") def admin_chunk_overlap_reset(request: Request): from ..middleware.chunk_overlap import \ get_chunk_overlap_analyzer get_chunk_overlap_analyzer().reset() return {"ok": True} # =========================== v3.77 Numeric Consistency ========== class NumericCheckRequest(BaseModel): answer: str sources: List[str] class NumericExtractRequest(BaseModel): text: str @app.get("/v1/admin/answer_numeric_consistency") def admin_answer_numeric_stats(): from ..middleware.answer_numeric_consistency import \ get_answer_numeric_consistency_checker return get_answer_numeric_consistency_checker().stats() @app.post("/v1/admin/answer_numeric_consistency/check") def admin_answer_numeric_check( req: NumericCheckRequest, request: Request): from ..middleware.answer_numeric_consistency import \ get_answer_numeric_consistency_checker try: report = get_answer_numeric_consistency_checker().check( req.answer, req.sources) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() @app.post("/v1/admin/answer_numeric_consistency/extract") def admin_answer_numeric_extract( req: NumericExtractRequest, request: Request): from ..middleware.answer_numeric_consistency import \ get_answer_numeric_consistency_checker try: facts = get_answer_numeric_consistency_checker().extract( req.text) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return {"facts": [f.to_dict() for f in facts]} @app.post("/v1/admin/answer_numeric_consistency/reset") def admin_answer_numeric_reset(request: Request): from ..middleware.answer_numeric_consistency import \ get_answer_numeric_consistency_checker get_answer_numeric_consistency_checker().reset() return {"ok": True} # =========================== v3.78 Error Budget Tracker ========= class ErrorBudgetRecordRequest(BaseModel): is_error: bool @app.get("/v1/admin/error_budget") def admin_error_budget_stats(): from ..middleware.error_budget import \ get_error_budget_tracker return get_error_budget_tracker().stats() @app.post("/v1/admin/error_budget/record") def admin_error_budget_record( req: ErrorBudgetRecordRequest, request: Request): from ..middleware.error_budget import \ get_error_budget_tracker get_error_budget_tracker().record(req.is_error) return {"ok": True} @app.get("/v1/admin/error_budget/analyze") def admin_error_budget_analyze(): from ..middleware.error_budget import \ get_error_budget_tracker return get_error_budget_tracker().analyze().to_dict() @app.post("/v1/admin/error_budget/reset") def admin_error_budget_reset(request: Request): from ..middleware.error_budget import \ get_error_budget_tracker get_error_budget_tracker().reset() return {"ok": True} # =========================== v3.79 Request Fingerprint ========== class FingerprintComputeRequest(BaseModel): request: Dict[str, Any] class FingerprintMatchRequest(BaseModel): a: Dict[str, Any] b: Dict[str, Any] strict: bool = False @app.get("/v1/admin/request_fingerprint") def admin_request_fingerprint_stats(): from ..middleware.request_fingerprint import \ get_request_fingerprinter return get_request_fingerprinter().stats() @app.post("/v1/admin/request_fingerprint/compute") def admin_request_fingerprint_compute( req: FingerprintComputeRequest, request: Request): from ..middleware.request_fingerprint import \ get_request_fingerprinter try: fp = get_request_fingerprinter().compute(req.request) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return fp.to_dict() @app.post("/v1/admin/request_fingerprint/match") def admin_request_fingerprint_match( req: FingerprintMatchRequest, request: Request): from ..middleware.request_fingerprint import \ get_request_fingerprinter try: ok = get_request_fingerprinter().matches( req.a, req.b, strict=req.strict) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return {"matches": ok, "strict": req.strict} @app.post("/v1/admin/request_fingerprint/reset") def admin_request_fingerprint_reset(request: Request): from ..middleware.request_fingerprint import \ get_request_fingerprinter get_request_fingerprinter().reset() return {"ok": True} # =========================== v3.80 Circuit Breaker ============== class BreakerRecordRequest(BaseModel): name: str success: bool @app.get("/v1/admin/circuit_breaker") def admin_circuit_breaker_stats(): from ..middleware.circuit_breaker import \ get_circuit_breaker_registry return get_circuit_breaker_registry().stats() @app.post("/v1/admin/circuit_breaker/record") def admin_circuit_breaker_record( req: BreakerRecordRequest, request: Request): from ..middleware.circuit_breaker import \ get_circuit_breaker_registry try: get_circuit_breaker_registry().record( req.name, req.success) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return {"ok": True, "state": get_circuit_breaker_registry().state( req.name)} @app.get("/v1/admin/circuit_breaker/state") def admin_circuit_breaker_state(name: str): from ..middleware.circuit_breaker import \ get_circuit_breaker_registry try: return {"name": name, "state": get_circuit_breaker_registry().state( name)} except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @app.get("/v1/admin/circuit_breaker/all") def admin_circuit_breaker_all(): from ..middleware.circuit_breaker import \ get_circuit_breaker_registry return {"breakers": get_circuit_breaker_registry().all_stats()} @app.post("/v1/admin/circuit_breaker/reset") def admin_circuit_breaker_reset( name: Optional[str] = None): from ..middleware.circuit_breaker import \ get_circuit_breaker_registry get_circuit_breaker_registry().reset(name) return {"ok": True, "reset": name or "all"} # ========================== v3.81–v3.95 admin routes (compact) ========== # These power the static admin dashboard at /admin. Each one is a # read-only stats/report endpoint plus the minimum write endpoint # the dashboard needs. @app.get("/v1/admin/corpus_ingestion/analyze") def admin_corpus_ingestion_analyze(window_sec: Optional[int] = None): from ..middleware.corpus_ingestion import \ get_corpus_ingestion_tracker return get_corpus_ingestion_tracker().analyze( window_sec=window_sec).to_dict() @app.get("/v1/admin/embedding_version") def admin_embedding_version_stats(): from ..middleware.embedding_version import \ get_embedding_version_manager return get_embedding_version_manager().stats() @app.get("/v1/admin/embedding_version/migration") def admin_embedding_version_migration(): from ..middleware.embedding_version import \ get_embedding_version_manager return get_embedding_version_manager().migration_report().to_dict() @app.get("/v1/admin/index_rebuild_scheduler") def admin_index_rebuild_stats(): from ..middleware.index_rebuild_scheduler import \ get_index_rebuild_scheduler return get_index_rebuild_scheduler().state().to_dict() @app.get("/v1/admin/dsr_handler/report") def admin_dsr_handler_report(): from ..middleware.dsr_handler import get_dsr_handler return get_dsr_handler().report().to_dict() @app.get("/v1/admin/lawful_basis") def admin_lawful_basis_stats(): from ..middleware.lawful_basis import \ get_lawful_basis_tracker return get_lawful_basis_tracker().stats() @app.get("/v1/admin/tenant_isolation") def admin_tenant_isolation_stats(): from ..middleware.tenant_isolation import \ get_tenant_isolation_enforcer return get_tenant_isolation_enforcer().stats() @app.get("/v1/admin/zero_result_tracker/report") def admin_zero_result_report(top_k: int = 20, min_occurrences: int = 1): from ..middleware.zero_result_tracker import \ get_zero_result_tracker return get_zero_result_tracker().report( top_k=top_k, min_occurrences=min_occurrences).to_dict() @app.get("/v1/admin/failed_generation/report") def admin_failed_generation_report(window_sec: Optional[int] = None): from ..middleware.failed_generation import \ get_failed_generation_tracker return get_failed_generation_tracker().report( window_sec=window_sec).to_dict() @app.get("/v1/admin/per_query_cost/total") def admin_per_query_cost_total(window_sec: Optional[int] = None): from ..middleware.per_query_cost import \ get_per_query_cost_attributor return {"total_usd": get_per_query_cost_attributor().total_usd( window_sec=window_sec)} @app.get("/v1/admin/per_query_cost/all_tenants") def admin_per_query_cost_all_tenants( window_sec: Optional[int] = None): from ..middleware.per_query_cost import \ get_per_query_cost_attributor sums = get_per_query_cost_attributor().all_tenants_summary( window_sec=window_sec) return {"tenants": [s.to_dict() for s in sums]} @app.get("/v1/admin/synthetic_probe/all") def admin_synthetic_probe_all(): from ..middleware.synthetic_probe import \ get_synthetic_probe_runner return {"probes": get_synthetic_probe_runner().all_probe_stats()} @app.get("/v1/admin/composite_health_score/aggregate") def admin_composite_health_aggregate(): from ..middleware.composite_health_score import \ get_composite_health_score_aggregator return get_composite_health_score_aggregator().aggregate().to_dict() @app.get("/v1/admin/answer_helpfulness/report") def admin_answer_helpfulness_report( window_sec: Optional[int] = None): from ..middleware.answer_helpfulness import \ get_answer_helpfulness_tracker return get_answer_helpfulness_tracker().report( window_sec=window_sec).to_dict() @app.get("/v1/admin/answer_helpfulness/negatives") def admin_answer_helpfulness_negatives(limit: int = 20): from ..middleware.answer_helpfulness import \ get_answer_helpfulness_tracker negs = get_answer_helpfulness_tracker().negative_samples( limit=limit) return {"negatives": [f.to_dict() for f in negs]} class HelpfulnessSubmitRequest(BaseModel): request_id: str vote: str reasons: Optional[List[str]] = None free_text: Optional[str] = "" tenant_id: Optional[str] = None user_id: Optional[str] = None @app.post("/v1/admin/answer_helpfulness/submit") def admin_answer_helpfulness_submit( req: HelpfulnessSubmitRequest, request: Request): from ..middleware.answer_helpfulness import \ get_answer_helpfulness_tracker try: f = get_answer_helpfulness_tracker().submit( request_id=req.request_id, vote=req.vote, reasons=req.reasons, free_text=req.free_text or "", tenant_id=req.tenant_id, user_id=req.user_id, ) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return f.to_dict() @app.get("/v1/admin/adversarial_prompt") def admin_adversarial_prompt_stats(): from ..middleware.adversarial_prompt import \ get_adversarial_prompt_detector return get_adversarial_prompt_detector().stats() class AdversarialDetectRequest(BaseModel): text: str @app.post("/v1/admin/adversarial_prompt/detect") def admin_adversarial_prompt_detect( req: AdversarialDetectRequest, request: Request): from ..middleware.adversarial_prompt import \ get_adversarial_prompt_detector try: report = get_adversarial_prompt_detector().detect(req.text) except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) return report.to_dict() # ========================== /v1/query — minimal end-user endpoint ===== def query_endpoint(req: QueryRequest, request: Request): """End-user query endpoint. Runs adversarial check, session tracking, retrieval, generation, and zero-result tracking server-side so the frontend doesn't need admin scope.""" from ..middleware.zero_result_tracker import \ get_zero_result_tracker from ..middleware.session_topic_tracker import \ get_session_topic_tracker from ..middleware.adversarial_prompt import \ get_adversarial_prompt_detector # 1. Adversarial pre-check (server-side, always runs) adv = None try: adv = get_adversarial_prompt_detector().detect(req.query) if adv.recommendation == "block": return { "answer": ( "⚠ הקלט שלך זוהה כחשוד (" + ", ".join(m.category for m in adv.matches) + "). אם זו טעות, פנה למנהל המערכת."), "docs": [], "confidence": 0.0, "request_id": req.request_id, "adversarial": adv.to_dict(), "blocked": True, } except Exception: pass # 2. Session topic tracking (server-side, silent) if req.session_id: try: get_session_topic_tracker().record( req.session_id, req.query) except Exception: pass # 3. Try the real pipeline; fall back to empty if not wired. try: from ..pipeline import get_pipeline from ..core.types import Query pipe = get_pipeline() # v4.x — pass session_id through to the pipeline so the # session-uploads sidecar (in pipeline.py) can search the user's # own uploaded docs alongside the public corpus. rag_resp = pipe.run(Query( text=req.query, session_id=req.session_id, filters=dict(req.filters or {}), )) # RAGResponse has: answer, sources (list of doc_ids), retrieved retrieved = getattr(rag_resp, "retrieved", []) or [] docs = [] for r in retrieved[:req.top_k]: # Pipeline returns Retrieved(chunk=Chunk(doc_id, text, # metadata, ...), score, retriever, ...). Drill in. chunk = (getattr(r, "chunk", None) or getattr(r, "document", None) or r) doc_id = (getattr(chunk, "doc_id", None) or getattr(chunk, "id", None) or getattr(r, "id", "?")) text = (getattr(chunk, "text", None) or getattr(chunk, "content", "") or "") raw_meta = (getattr(chunk, "metadata", None) or getattr(r, "metadata", None) or {}) # Sometimes metadata is double-wrapped: meta = {metadata: {...}, source: ...} meta = raw_meta.get("metadata", raw_meta) \ if isinstance(raw_meta, dict) else {} title = (meta.get("title") if isinstance(meta, dict) else None) or \ (raw_meta.get("source") if isinstance(raw_meta, dict) else None) or \ str(doc_id) kind = (meta.get("kind") if isinstance(meta, dict) else None) chunk_idx = (meta.get("chunk") if isinstance(meta, dict) else None) # v2.x — expose rich metadata so the frontend side-panel can # render citation, judges, dates, law, topic, etc. exposed_meta = {} if isinstance(meta, dict): # Flatten commonly-used keys with a safe subset (avoid leaking # internal markers). Keep strings/numbers/lists only. for k in ("citation", "court", "judges", "verdict_dt", "verdict_ty", "inyan", "sides", "section", "law", "topic", "chapter", "language", "type", "verbatim", "title", "source"): v = meta.get(k) if v is not None and not isinstance(v, (bytes, bytearray)): exposed_meta[k] = v docs.append({ "doc_id": str(doc_id), "title": str(title), # v3.x — increased from 600 to 2500 chars so the frontend # can render FULL paragraphs (statute clauses are often # 800-1500 chars; caselaw excerpts can run 2000+). The # frontend's citation-card UI already truncates for display # via -webkit-line-clamp, so sending more text costs nothing # visually but gives us headroom for the source panel. "text": text[:2500], "score": getattr(r, "score", None), "kind": kind, "chunk_index": chunk_idx, "retriever": getattr(r, "retriever", None), "metadata": exposed_meta, }) if not docs: get_zero_result_tracker().record(req.query) signals = getattr(rag_resp, "signals", None) confidence = (getattr(signals, "omega", None) if signals else None) # v2.x — surface which generator actually ran (for UI badge). # Primary = anthropic/openai/local per preset; fallback = # extractive if primary errored. Read from pipe's stash set # in pipeline.py run() after the generate stage. gen_used = getattr(pipe, "_last_generator_used", getattr(pipe.generator, "name", "unknown")) gen_fallback = bool(getattr(pipe, "_last_generator_fallback", False)) answer = getattr(rag_resp, "answer", "(no answer generated)") # v3.x — training data collector (opt-in via TAU_RAG_COLLECT_TRAINING=1) # Auto-generate a request_id when the client didn't supply one, # so batch scripts and other callers don't silently lose traces. try: from ..middleware.training_collector import get_training_collector import uuid as _uuid _rid = (req.request_id or "").strip() if not _rid: _rid = "auto_" + _uuid.uuid4().hex[:16] get_training_collector().record( request_id=_rid, query=req.query, context=[{ "doc_id": d.get("doc_id"), "text": d.get("text", "")[:600], "metadata": d.get("metadata", {}), "score": d.get("score"), "retriever": d.get("retriever"), } for d in docs], answer=answer, language=getattr(getattr(rag_resp, "understanding", None), "language", "") or "", generator=gen_used, confidence=confidence, ) except Exception: pass # v4.x — Smart Reasoning: surface understanding + timings so # the frontend can render Query Decomposition + Reasoning Trace. understanding = getattr(rag_resp, "understanding", None) sub_queries = [] query_type = None domain = None if understanding is not None: sub_queries = list(getattr(understanding, "sub_queries", []) or []) # Drop the original query if it's accidentally in there sub_queries = [s for s in sub_queries if s and s != req.query] query_type = getattr(understanding, "query_type", None) domain = getattr(understanding, "domain", None) # RAGResponse has `timing_ms`, not `timings` timings_dict = getattr(rag_resp, "timing_ms", None) or \ getattr(rag_resp, "timings", None) or {} # v2 — Section-type post-filter (e.g. "search only in דיון # sections"). Recognized: header / background / procedural / # arguments_plaintiff / arguments_defendant / arguments_general / # discussion / ruling / operative. try: sec_type = (req.filters or {}).get("section_type") if sec_type: docs = _filter_docs_by_section_type(docs, str(sec_type)) except Exception: pass # v2 — Smart Answer Enrichment: for each source doc, attach its # precedent status (overruled / good law / unknown). Cheap because # the corpus-wide overruled cache is built once and reused. try: _enrich_docs_with_precedent_status(docs) except Exception: pass return { "answer": answer, "docs": docs, "confidence": confidence, "request_id": req.request_id, "adversarial": adv.to_dict() if adv else None, "generator": { "used": gen_used, "primary": pipe.config.generation.provider, "model": pipe.config.generation.model, "fallback": gen_fallback, }, # v4.x — Smart Reasoning trace "understanding": { "sub_queries": sub_queries, "query_type": query_type, "domain": domain, }, "timings": ( {k: round(float(v), 2) for k, v in timings_dict.items()} if isinstance(timings_dict, dict) else {} ), } except Exception as e: # Fallback if pipeline init failed get_zero_result_tracker().record(req.query) return { "answer": ( f"⚠ Pipeline error ({type(e).__name__}: {e}). " "Check server logs and confirm " "TAU_RAG_PRESET is set."), "docs": [], "confidence": 0.0, "request_id": req.request_id, "adversarial": adv.to_dict() if adv else None, } # v4.x — Streaming variant of /v1/query. # Emits SSE events: docs → token×N → signals → done. # The perceived-latency win comes from sending docs FIRST so the user # sees citation cards while the answer text streams in chunks. def query_stream_endpoint(req: QueryRequest, request: Request): from fastapi.responses import StreamingResponse import json as _sjson def _err_payload(msg, code="error"): return _sjson.dumps({"error": {"code": code, "message": str(msg)}}, ensure_ascii=False) def _sse(event, data): # SSE framing: event:\n data:\n\n return f"event: {event}\ndata: {data}\n\n" def gen(): # Reuse the SAME logic as the sync endpoint. We run it once, # then chunk the answer for the streaming effect. try: from ..middleware.zero_result_tracker import \ get_zero_result_tracker from ..middleware.session_topic_tracker import \ get_session_topic_tracker from ..middleware.adversarial_prompt import \ get_adversarial_prompt_detector # Adversarial pre-check (mirrors sync endpoint) adv = None try: adv = get_adversarial_prompt_detector().detect(req.query) if adv.recommendation == "block": yield _sse("error", _err_payload( "הקלט שלך זוהה כחשוד.", code="adversarial")) yield _sse("done", _sjson.dumps({"blocked": True}, ensure_ascii=False)) return except Exception: pass if req.session_id: try: get_session_topic_tracker().record( req.session_id, req.query) except Exception: pass from ..pipeline import get_pipeline from ..core.types import Query pipe = get_pipeline() rag_resp = pipe.run(Query( text=req.query, session_id=req.session_id, filters=dict(req.filters or {}), )) # Build the docs payload (identical shape to /v1/query) retrieved = getattr(rag_resp, "retrieved", []) or [] docs = [] for r in retrieved[:req.top_k]: chunk = (getattr(r, "chunk", None) or getattr(r, "document", None) or r) doc_id = (getattr(chunk, "doc_id", None) or getattr(chunk, "id", None) or getattr(r, "id", "?")) text = (getattr(chunk, "text", None) or getattr(chunk, "content", "") or "") raw_meta = (getattr(chunk, "metadata", None) or getattr(r, "metadata", None) or {}) meta = (raw_meta.get("metadata", raw_meta) if isinstance(raw_meta, dict) else {}) title = (meta.get("title") if isinstance(meta, dict) else None) or \ (raw_meta.get("source") if isinstance(raw_meta, dict) else None) or \ str(doc_id) exposed_meta = {} if isinstance(meta, dict): for k in ("citation", "court", "judges", "verdict_dt", "verdict_ty", "inyan", "sides", "section", "law", "topic", "chapter", "language", "type", "verbatim", "title", "source"): v = meta.get(k) if v is not None and not isinstance( v, (bytes, bytearray)): exposed_meta[k] = v docs.append({ "doc_id": str(doc_id), "title": str(title), "text": text[:2500], "score": getattr(r, "score", None), "kind": (meta.get("kind") if isinstance(meta, dict) else None), "chunk_index": (meta.get("chunk") if isinstance(meta, dict) else None), "retriever": getattr(r, "retriever", None), "metadata": exposed_meta, }) # 1) Emit docs FIRST so the UI can render citation cards # while the answer text streams in. Pre-filter by section # type (if requested), then enrich with precedent status # BEFORE emitting so the user sees overruled badges on the # very first paint. try: sec_type = (req.filters or {}).get("section_type") if sec_type: docs = _filter_docs_by_section_type(docs, str(sec_type)) except Exception: pass try: _enrich_docs_with_precedent_status(docs) except Exception: pass yield _sse("docs", _sjson.dumps({"docs": docs}, ensure_ascii=False)) answer = getattr(rag_resp, "answer", "(no answer generated)") # 2) Stream the answer in word-sized chunks. Hebrew tokens # are space-delimited like English here. We emit chunks # of ~6 words each so the typewriter effect is smooth # but doesn't flood the network with one-word events. import re as _sre pieces = _sre.split(r'(\s+)', answer) # keep whitespace buf = [] count = 0 for p in pieces: buf.append(p) if not p.isspace(): count += 1 if count >= 6: chunk = ''.join(buf) yield _sse("token", _sjson.dumps( {"chunk": chunk}, ensure_ascii=False)) buf = []; count = 0 if buf: yield _sse("token", _sjson.dumps( {"chunk": ''.join(buf)}, ensure_ascii=False)) # 3) Signals + meta signals = getattr(rag_resp, "signals", None) confidence = (getattr(signals, "omega", None) if signals else None) gen_used = getattr(pipe, "_last_generator_used", getattr(pipe.generator, "name", "unknown")) gen_fallback = bool(getattr(pipe, "_last_generator_fallback", False)) yield _sse("signals", _sjson.dumps({ "confidence": confidence, "generator": { "used": gen_used, "primary": pipe.config.generation.provider, "model": pipe.config.generation.model, "fallback": gen_fallback, }, "adversarial": adv.to_dict() if adv else None, }, ensure_ascii=False)) # 4) Done — full final payload (mirrors /v1/query response) yield _sse("done", _sjson.dumps({ "answer": answer, "docs": docs, "confidence": confidence, "request_id": req.request_id, "adversarial": adv.to_dict() if adv else None, "generator": { "used": gen_used, "primary": pipe.config.generation.provider, "model": pipe.config.generation.model, "fallback": gen_fallback, }, }, ensure_ascii=False)) except Exception as e: try: from ..middleware.zero_result_tracker import \ get_zero_result_tracker get_zero_result_tracker().record(req.query) except Exception: pass yield _sse("error", _err_payload( f"Pipeline error ({type(e).__name__}: {e})")) yield _sse("done", _sjson.dumps( {"answer": "", "docs": []}, ensure_ascii=False)) return StreamingResponse( gen(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache, no-transform", "X-Accel-Buffering": "no", # tell nginx not to buffer "Connection": "keep-alive", }, ) # -------- /v1/data/load_jsonl — load a JSONL by server-side path ---- class LoadJsonlRequest(BaseModel): path: str batch_size: int = 1000 max_lines: Optional[int] = None skip_lines: int = 0 @app.post("/v1/data/load_jsonl") def data_load_jsonl(req: LoadJsonlRequest, request: Request): """Load a JSONL file from a server-side path into the pipeline. Use this when the JSONL is large (>100MB) — multipart upload chokes on big bodies. The file MUST be on the server filesystem. Each line: {"id": "...", "text": "...", "metadata": {...}}. Streams in batches; safe for multi-GB files. """ import os from pathlib import Path from ..middleware.corpus_ingestion import ( get_corpus_ingestion_tracker, STAGE_PARSE, STAGE_INDEX, ) from ..core.types import Document p = Path(req.path).expanduser().resolve() if not p.exists() or not p.is_file(): raise HTTPException( status_code=404, detail=f"file not found on server: {p}") if req.batch_size < 1: raise HTTPException( status_code=400, detail="batch_size must be >= 1") tracker = get_corpus_ingestion_tracker() pipe = None try: from ..pipeline import get_pipeline pipe = get_pipeline() except Exception: pipe = None n_total = 0 n_indexed = 0 n_bad_lines = 0 n_batches = 0 batch: List[Document] = [] source_name = p.name with open(p, "r", encoding="utf-8") as f: for i, line in enumerate(f): if i < req.skip_lines: continue line = line.strip() if not line: continue try: row = json.loads(line) except Exception: n_bad_lines += 1 continue text = row.get("text") or row.get("chunk") or "" if not text: n_bad_lines += 1 continue meta = row.get("metadata", {}) or {} if isinstance(meta, dict): meta = dict(meta) meta.setdefault("source", source_name) else: meta = {"source": source_name} doc = Document( id=str(row.get("id", f"{source_name}-{i}")), text=str(text), metadata=meta, ) batch.append(doc) n_total += 1 if len(batch) >= req.batch_size: # Flush batch for d in batch: tracker.record(d.id, source_name, STAGE_PARSE, True, 0) if pipe and hasattr(pipe, "add_documents"): try: pipe.add_documents(batch) for d in batch: tracker.record( d.id, source_name, STAGE_INDEX, True, 0) n_indexed += len(batch) except Exception: pass n_batches += 1 batch = [] if req.max_lines and n_total >= req.max_lines: break # Flush final partial batch if batch: for d in batch: tracker.record(d.id, source_name, STAGE_PARSE, True, 0) if pipe and hasattr(pipe, "add_documents"): try: pipe.add_documents(batch) for d in batch: tracker.record(d.id, source_name, STAGE_INDEX, True, 0) n_indexed += len(batch) except Exception: pass n_batches += 1 return { "ok": True, "path": str(p), "size_mb": round(p.stat().st_size / 1024 / 1024, 2), "n_docs_loaded": n_total, "n_indexed_in_pipeline": n_indexed, "n_bad_lines": n_bad_lines, "n_batches": n_batches, "pipeline_attached": pipe is not None, } # -------- /v1/data/load_local — stream a local JSONL into the index -- class LoadLocalRequest(BaseModel): path: str # server-side JSONL path batch_size: int = 1000 # how many docs to add at a time max_docs: Optional[int] = None # cap total (for testing) skip_first: int = 0 # resume from line N @app.post("/v1/data/load_local") def load_local_endpoint(req: LoadLocalRequest, request: Request): """Stream a local JSONL file into the live index. Use this when the file is too big to POST as multipart. Reads line-by-line, batches into the pipeline, reports progress. """ import os from pathlib import Path from ..middleware.corpus_ingestion import ( get_corpus_ingestion_tracker, STAGE_INDEX, ) from ..core.types import Document src = Path(req.path).expanduser().resolve() if not src.exists(): raise HTTPException(status_code=404, detail=f"file not found: {src}") if not src.is_file(): raise HTTPException(status_code=400, detail=f"not a file: {src}") pipe = None try: from ..pipeline import get_pipeline pipe = get_pipeline() except Exception: pipe = None tracker = get_corpus_ingestion_tracker() total_size = src.stat().st_size n_lines = 0 n_docs = 0 n_indexed = 0 n_errors = 0 batch: List[Document] = [] def flush_batch(): nonlocal n_indexed if not batch: return if pipe and hasattr(pipe, "add_documents"): try: pipe.add_documents(batch) for d in batch: tracker.record( d.id, d.metadata.get("source", "jsonl"), STAGE_INDEX, True, 0) n_indexed += len(batch) except Exception: pass batch.clear() with open(src, encoding="utf-8", errors="replace") as f: for line in f: n_lines += 1 if n_lines <= req.skip_first: continue if req.max_docs and n_docs >= req.max_docs: break line = line.strip() if not line: continue try: row = json.loads(line) except Exception: n_errors += 1 continue text = row.get("text", "") if not text: continue doc = Document( id=str(row.get("id", f"line-{n_lines}")), text=str(text), metadata=row.get("metadata", {}) or {}) batch.append(doc) n_docs += 1 if len(batch) >= req.batch_size: flush_batch() flush_batch() return { "ok": True, "file": str(src), "file_size_bytes": total_size, "n_lines": n_lines, "n_docs_parsed": n_docs, "n_docs_indexed": n_indexed, "n_errors": n_errors, "pipeline_attached": pipe is not None, } # -------- /v1/data/load_jsonl — server-side JSONL load (streaming) -- class LoadJsonlRequest(BaseModel): path: str batch_size: int = 500 @app.post("/v1/data/load_jsonl") def data_load_jsonl(req: LoadJsonlRequest, request: Request): """Load a server-side JSONL file into the live pipeline, streaming row-by-row in batches. Avoids multipart upload for large corpora (1M+ docs). Each JSONL line must be: {"id":..,"text":..,"metadata":..} """ import os from ..core.types import Document from ..middleware.corpus_ingestion import ( get_corpus_ingestion_tracker, STAGE_PARSE, STAGE_INDEX, ) path = os.path.expanduser(req.path) if not os.path.isfile(path): raise HTTPException( status_code=404, detail=f"file not found on server: {path}") if req.batch_size < 1 or req.batch_size > 10000: raise HTTPException( status_code=400, detail="batch_size must be in [1, 10000]") try: from ..pipeline import get_pipeline pipe = get_pipeline() if not hasattr(pipe, "add_documents"): raise HTTPException( status_code=500, detail="pipeline has no add_documents() method") except ImportError: raise HTTPException( status_code=500, detail="pipeline module missing") tracker = get_corpus_ingestion_tracker() source_name = os.path.basename(path) n_lines = 0 n_indexed = 0 n_skipped = 0 batch: List[Document] = [] errors: List[str] = [] def flush_batch(): nonlocal batch, n_indexed if not batch: return try: pipe.add_documents(batch) for d in batch: tracker.record(d.id, source_name, STAGE_INDEX, True, 0) n_indexed += len(batch) except Exception as e: errors.append( f"batch of {len(batch)} failed: " f"{type(e).__name__}: {e}") batch = [] try: with open(path, "r", encoding="utf-8") as f: for line in f: n_lines += 1 line = line.strip() if not line: continue try: row = json.loads(line) except Exception: n_skipped += 1 continue text = row.get("text") if not text: n_skipped += 1 continue doc_id = str(row.get( "id", f"{source_name}-{n_lines}")) meta = row.get("metadata") or {} meta["source"] = meta.get( "source", source_name) d = Document(id=doc_id, text=str(text), metadata=meta) tracker.record(d.id, source_name, STAGE_PARSE, True, 0) batch.append(d) if len(batch) >= req.batch_size: flush_batch() flush_batch() except Exception as e: errors.append( f"read error at line {n_lines}: " f"{type(e).__name__}: {e}") return { "ok": not errors, "path": path, "n_lines_read": n_lines, "n_docs_indexed": n_indexed, "n_skipped": n_skipped, "batch_size": req.batch_size, "errors": errors[:10], } # -------- /v1/data/upload — upload any document type as corpus -------- def _chunk_text(text: str, target_chars: int = 1500, overlap_chars: int = 200) -> List[str]: """Simple sentence-aware chunker. Avoids splitting mid-sentence when possible. Falls back to char-window if sentences are huge.""" import re as _re text = (text or "").strip() if not text: return [] if len(text) <= target_chars: return [text] # Split on sentence boundaries (HE + EN) sentences = _re.split(r'(?<=[.!?׃])\s+|\n{2,}', text) chunks: List[str] = [] cur = "" for s in sentences: s = s.strip() if not s: continue if len(cur) + len(s) + 1 <= target_chars: cur = (cur + " " + s).strip() if cur else s else: if cur: chunks.append(cur) # Sentence itself larger than budget — hard split if len(s) > target_chars: for i in range(0, len(s), target_chars - overlap_chars): chunks.append(s[i:i + target_chars]) cur = "" else: cur = s if cur: chunks.append(cur) # Apply overlap between consecutive chunks if overlap_chars > 0 and len(chunks) > 1: with_overlap = [chunks[0]] for i in range(1, len(chunks)): prev_tail = chunks[i - 1][-overlap_chars:] with_overlap.append(prev_tail + " " + chunks[i]) return with_overlap return chunks def _extract_pdf_text(path: str) -> str: try: from pypdf import PdfReader # type: ignore except ImportError: try: from PyPDF2 import PdfReader # type: ignore except ImportError: raise RuntimeError( "PDF support requires 'pypdf' or 'PyPDF2'. " "pip install pypdf") reader = PdfReader(path) out = [] for page in reader.pages: try: out.append(page.extract_text() or "") except Exception: continue return "\n\n".join(out) def _extract_docx_text(path: str) -> str: try: from docx import Document as _DocxDoc # type: ignore except ImportError: raise RuntimeError( "DOCX support requires 'python-docx'. " "pip install python-docx") d = _DocxDoc(path) return "\n\n".join(p.text for p in d.paragraphs if p.text.strip()) def _extract_pptx_text(path: str) -> str: try: from pptx import Presentation # type: ignore except ImportError: raise RuntimeError( "PPTX support requires 'python-pptx'. " "pip install python-pptx") prs = Presentation(path) parts = [] for slide_i, slide in enumerate(prs.slides, 1): slide_parts = [] for shape in slide.shapes: if hasattr(shape, "text") and shape.text: slide_parts.append(shape.text) if slide_parts: parts.append( f"## Slide {slide_i}\n" + "\n".join(slide_parts)) return "\n\n".join(parts) def _extract_xlsx_text(path: str) -> str: try: from openpyxl import load_workbook # type: ignore except ImportError: raise RuntimeError( "XLSX support requires 'openpyxl'. " "pip install openpyxl") wb = load_workbook(path, data_only=True, read_only=True) parts = [] for ws in wb.worksheets: sheet_parts = [f"## Sheet: {ws.title}"] for row in ws.iter_rows(values_only=True): cells = [str(c) for c in row if c is not None and str(c).strip()] if cells: sheet_parts.append(" | ".join(cells)) if len(sheet_parts) > 1: parts.append("\n".join(sheet_parts)) return "\n\n".join(parts) def _extract_html_text(html_bytes: bytes) -> str: """Simple HTML→text without external deps.""" import re as _re from html.parser import HTMLParser text = html_bytes.decode("utf-8", errors="replace") # Drop script/style content text = _re.sub(r'<(script|style)[^>]*>.*?', ' ', text, flags=_re.DOTALL | _re.IGNORECASE) class _Strip(HTMLParser): def __init__(self): super().__init__() self.parts: List[str] = [] self._skip = 0 def handle_starttag(self, tag, attrs): if tag.lower() in ("script", "style"): self._skip += 1 def handle_endtag(self, tag): if tag.lower() in ("script", "style") and self._skip: self._skip -= 1 def handle_data(self, data): if not self._skip and data.strip(): self.parts.append(data) p = _Strip() try: p.feed(text) except Exception: pass out = " ".join(p.parts) # Collapse whitespace out = _re.sub(r'\s+', ' ', out).strip() return out @app.post("/v1/data/upload") async def data_upload_endpoint( file: UploadFile = File(...), kind: str = Form("auto"), text_field: str = Form("text"), id_field: str = Form("id"), chunk_size: int = Form(1500), chunk_overlap: int = Form(200)): """Upload any document as corpus. Supported kinds (auto-detected from extension): - parquet / pq — pyarrow rows → Documents - csv / tsv — DictReader rows → Documents - jsonl / ndjson / json — one JSON per line → Documents - pkl / pickle — list/dict pickle → Documents - txt — plain text → chunked Documents - md / markdown — markdown → chunked Documents - pdf — pypdf extract → chunked Documents - docx — python-docx extract → chunked Documents - html / htm — strip tags → chunked Documents Form fields: - file (multipart, required) - kind: explicit override (default auto-detect) - text_field / id_field: only for tabular formats - chunk_size / chunk_overlap: only for free-text formats """ import os from ..middleware.corpus_ingestion import ( get_corpus_ingestion_tracker, STAGE_PARSE, STAGE_INDEX, ) from ..core.types import Document if file is None: raise HTTPException(status_code=400, detail="missing 'file' field") runtime_dir = os.environ.get( "TAU_RAG_RUNTIME_DIR", os.path.join(os.path.dirname( os.path.dirname(os.path.abspath(__file__))), "runtime")) upload_dir = os.path.join(runtime_dir, "uploads") os.makedirs(upload_dir, exist_ok=True) safe_name = os.path.basename(file.filename or "upload.dat") target = os.path.join(upload_dir, safe_name) ext = (safe_name.rsplit(".", 1)[-1] or "").lower() auto_kind = { "parquet": "parquet", "pq": "parquet", "csv": "csv", "tsv": "csv", "jsonl": "jsonl", "ndjson": "jsonl", "json": "jsonl", "pkl": "pkl", "pickle": "pkl", "txt": "txt", "text": "txt", "log": "txt", "md": "md", "markdown": "md", "yaml": "txt", "yml": "txt", "pdf": "pdf", "docx": "docx", "pptx": "pptx", "xlsx": "xlsx", "html": "html", "htm": "html", }.get(ext, "txt" if kind == "auto" else kind) inferred = auto_kind if kind == "auto" else kind contents = await file.read() with open(target, "wb") as f: f.write(contents) docs: List[Document] = [] parse_error: Optional[str] = None try: # ============================ TABULAR FORMATS =========== if inferred == "parquet": try: import pyarrow.parquet as pq # type: ignore table = pq.read_table(target) rows = [dict(zip(table.column_names, vals)) for vals in zip(*[c.to_pylist() for c in table.columns])] except ImportError: import pandas as pd # type: ignore rows = pd.read_parquet(target).to_dict("records") docs = _rows_to_docs(rows, safe_name, text_field, id_field) elif inferred == "csv": import csv as _csv delim = "\t" if ext == "tsv" else "," with open(target, encoding="utf-8") as f: rows = list(_csv.DictReader(f, delimiter=delim)) docs = _rows_to_docs(rows, safe_name, text_field, id_field) elif inferred == "jsonl": rows = [] for line in contents.decode( "utf-8", errors="replace").splitlines(): line = line.strip() if not line: continue try: rows.append(json.loads(line)) except Exception: continue docs = _rows_to_docs(rows, safe_name, text_field, id_field) elif inferred == "pkl": import pickle with open(target, "rb") as f: data = pickle.load(f) rows = (data if isinstance(data, list) else [data] if isinstance(data, dict) else []) docs = _rows_to_docs(rows, safe_name, text_field, id_field) # ============================ FREE-TEXT FORMATS ========= elif inferred in ("txt", "md"): text = contents.decode("utf-8", errors="replace") chunks = _chunk_text(text, chunk_size, chunk_overlap) for i, ch in enumerate(chunks): docs.append(Document( id=f"{safe_name}-{i}", text=ch, metadata={"source": safe_name, "chunk": i, "n_chunks": len(chunks), "kind": inferred})) elif inferred == "pdf": text = _extract_pdf_text(target) chunks = _chunk_text(text, chunk_size, chunk_overlap) for i, ch in enumerate(chunks): docs.append(Document( id=f"{safe_name}-{i}", text=ch, metadata={"source": safe_name, "chunk": i, "n_chunks": len(chunks), "kind": "pdf"})) elif inferred == "docx": text = _extract_docx_text(target) chunks = _chunk_text(text, chunk_size, chunk_overlap) for i, ch in enumerate(chunks): docs.append(Document( id=f"{safe_name}-{i}", text=ch, metadata={"source": safe_name, "chunk": i, "n_chunks": len(chunks), "kind": "docx"})) elif inferred == "pptx": text = _extract_pptx_text(target) chunks = _chunk_text(text, chunk_size, chunk_overlap) for i, ch in enumerate(chunks): docs.append(Document( id=f"{safe_name}-{i}", text=ch, metadata={"source": safe_name, "chunk": i, "n_chunks": len(chunks), "kind": "pptx"})) elif inferred == "xlsx": text = _extract_xlsx_text(target) chunks = _chunk_text(text, chunk_size, chunk_overlap) for i, ch in enumerate(chunks): docs.append(Document( id=f"{safe_name}-{i}", text=ch, metadata={"source": safe_name, "chunk": i, "n_chunks": len(chunks), "kind": "xlsx"})) elif inferred == "html": text = _extract_html_text(contents) chunks = _chunk_text(text, chunk_size, chunk_overlap) for i, ch in enumerate(chunks): docs.append(Document( id=f"{safe_name}-{i}", text=ch, metadata={"source": safe_name, "chunk": i, "n_chunks": len(chunks), "kind": "html"})) else: raise ValueError(f"unsupported kind: {inferred}") except Exception as e: parse_error = f"{type(e).__name__}: {e}" # Track ingestion (v3.81) tracker = get_corpus_ingestion_tracker() n_indexed = 0 if not parse_error: for d in docs: tracker.record(d.id, safe_name, STAGE_PARSE, True, 0) try: from ..pipeline import get_pipeline pipe = get_pipeline() if hasattr(pipe, "add_documents"): pipe.add_documents(docs) for d in docs: tracker.record(d.id, safe_name, STAGE_INDEX, True, 0) n_indexed = len(docs) except Exception: pass return { "ok": parse_error is None, "filename": safe_name, "kind": inferred, "saved_to": target, "n_rows_parsed": len(docs), "n_indexed_in_pipeline": n_indexed, "size_bytes": len(contents), "error": parse_error, "sample_doc_ids": [d.id for d in docs[:5]], "preview": docs[0].text[:200] if docs else None, } def _rows_to_docs(rows, source_name: str, text_field: str, id_field: str): """Helper: tabular rows → Document list. Handles 2 row shapes: 1. Flat row: {id, text, ...other_fields} 2. Nested row: {id, text, metadata: {source, kind, ...}} — common when ingesting an already-processed JSONL. In case 2, we FLATTEN the inner metadata to preserve the original source instead of overriding it with the upload filename. """ from ..core.types import Document out = [] for i, row in enumerate(rows): if isinstance(row, str): out.append(Document( id=f"{source_name}-{i}", text=row, metadata={"source": source_name, "upload_source": source_name})) continue if not isinstance(row, dict): continue text = (row.get(text_field) or row.get("text") or row.get("chunk") or row.get("content") or "") if not text: continue doc_id = str(row.get(id_field) or row.get("id") or f"{source_name}-{i}") # Start with all top-level fields (minus id/text) meta = {k: v for k, v in row.items() if k not in (text_field, "text", "chunk", "content", id_field, "id", "metadata")} # Merge nested 'metadata' dict if present, INNER WINS inner = row.get("metadata") if isinstance(inner, dict): for k, v in inner.items(): meta[k] = v # Preserve upload provenance WITHOUT clobbering the # original source (which might be much more specific) if "source" not in meta: meta["source"] = source_name meta["upload_source"] = source_name out.append(Document( id=doc_id, text=str(text), metadata=meta)) return out # -------- /v1/data/load_corpus_from_path — stream JSONL → pipeline -- class LoadCorpusRequest(BaseModel): path: str # server-side JSONL path limit: Optional[int] = None # cap N chunks (useful for testing) batch_size: int = 1000 filter_kind: Optional[List[str]] = None # e.g. ["pdf","txt"] skip_existing: bool = False # if pipeline already loaded, skip @app.post("/v1/data/load_corpus_from_path") def load_corpus_from_path(req: LoadCorpusRequest, request: Request): """Stream a JSONL file from disk into the live pipeline. Each line must be a JSON object with {"id", "text", "metadata"}. Reads in batches to avoid memory spikes. Returns a summary. This bypasses HTTP upload — use it for large corpora (produced by scripts/ingest_local.py). """ import os from ..core.types import Document path = os.path.expanduser(req.path) if not os.path.exists(path): raise HTTPException(status_code=404, detail=f"path not found: {path}") if not os.path.isfile(path): raise HTTPException(status_code=400, detail=f"not a regular file: {path}") # Attach pipeline pipe = None try: from ..pipeline import get_pipeline pipe = get_pipeline() if not hasattr(pipe, "add_documents"): pipe = None except Exception: pipe = None if pipe is None: raise HTTPException( status_code=503, detail="pipeline has no add_documents() — " "check preset config") kinds = set(req.filter_kind) if req.filter_kind else None total = 0 added = 0 skipped = 0 bad_lines = 0 batch: List[Document] = [] import time as _time t0 = _time.time() with open(path, encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue total += 1 try: row = json.loads(line) except Exception: bad_lines += 1 continue text = row.get("text", "") doc_id = str(row.get("id") or f"doc-{total}") meta = row.get("metadata") or {} if kinds and meta.get("kind") not in kinds: skipped += 1 continue if not text or not isinstance(text, str): skipped += 1 continue batch.append(Document( id=doc_id, text=text, metadata=dict(meta))) if len(batch) >= req.batch_size: try: pipe.add_documents(batch) added += len(batch) except Exception as e: raise HTTPException( status_code=500, detail=f"pipeline.add_documents " f"failed at n={added}: " f"{type(e).__name__}: {e}") batch = [] if req.limit and added >= req.limit: break # Flush remainder if batch: try: pipe.add_documents(batch) added += len(batch) except Exception as e: raise HTTPException( status_code=500, detail=f"pipeline.add_documents failed " f"at final flush: {e}") elapsed = _time.time() - t0 return { "ok": True, "path": path, "n_lines_total": total, "n_docs_added": added, "n_skipped_filter": skipped, "n_bad_lines": bad_lines, "elapsed_sec": round(elapsed, 2), "docs_per_sec": round(added / max(1, elapsed), 1), } # -------- /v1/data/ingest_dir — recursively ingest a host directory -- class IngestDirRequest(BaseModel): path: str recursive: bool = True chunk_size: int = 1500 chunk_overlap: int = 200 max_files: Optional[int] = None extensions: Optional[List[str]] = None # whitelist; default = all dry_run: bool = False # scan only, don't ingest @app.post("/v1/data/ingest_dir") def data_ingest_dir(req: IngestDirRequest, request: Request): """Recursively ingest a server-side directory. Walks `req.path` (server filesystem!), detects each file by extension, parses + chunks + adds to live pipeline. Returns a per-file summary. Note: `req.path` is resolved on the SERVER, not the client. The server process must have read access to it. """ import os from pathlib import Path from ..middleware.corpus_ingestion import ( get_corpus_ingestion_tracker, STAGE_PARSE, STAGE_INDEX, ) from ..core.types import Document root = Path(req.path).expanduser().resolve() if not root.exists(): raise HTTPException( status_code=404, detail=f"path does not exist on server: {root}") if not root.is_dir(): raise HTTPException( status_code=400, detail=f"not a directory: {root}") # Auto-detect by extension; same table as /v1/data/upload + pptx/xlsx ext_to_kind = { ".parquet": "parquet", ".pq": "parquet", ".csv": "csv", ".tsv": "csv", ".jsonl": "jsonl", ".ndjson": "jsonl", ".json": "jsonl", ".pkl": "pkl", ".pickle": "pkl", ".txt": "txt", ".text": "txt", ".log": "txt", ".md": "md", ".markdown": "md", ".yaml": "txt", ".yml": "txt", ".pdf": "pdf", ".docx": "docx", ".pptx": "pptx", ".xlsx": "xlsx", ".html": "html", ".htm": "html", } allowed = (set(e.lower() if e.startswith(".") else "." + e.lower() for e in (req.extensions or [])) or set(ext_to_kind.keys())) # Walk files: List[Path] = [] iter_paths = root.rglob("*") if req.recursive else root.iterdir() for p in iter_paths: if not p.is_file(): continue if p.suffix.lower() not in allowed: continue if any(part.startswith(".") for part in p.parts[len(root.parts):]): continue # skip dot-dirs / dotfiles files.append(p) if req.max_files and len(files) >= req.max_files: break files.sort() if req.dry_run: return { "ok": True, "dry_run": True, "root": str(root), "n_files_found": len(files), "by_kind": _count_by( Counter([ext_to_kind.get(p.suffix.lower(), "other") for p in files])), "files": [str(p.relative_to(root)) for p in files[:200]], "truncated": len(files) > 200, } tracker = get_corpus_ingestion_tracker() per_file: List[Dict[str, Any]] = [] n_total_docs = 0 n_total_indexed = 0 pipe = None try: from ..pipeline import get_pipeline pipe = get_pipeline() except Exception: pipe = None for p in files: kind = ext_to_kind.get(p.suffix.lower(), "txt") rec = {"file": str(p.relative_to(root)), "kind": kind, "ok": False, "n_docs": 0, "n_indexed": 0, "error": None} try: with open(p, "rb") as f: contents = f.read() docs = _parse_to_docs( contents=contents, target_path=str(p), source_name=str(p.relative_to(root)), kind=kind, text_field="text", id_field="id", chunk_size=req.chunk_size, chunk_overlap=req.chunk_overlap, ) rec["n_docs"] = len(docs) for d in docs: tracker.record(d.id, str(p.relative_to(root)), STAGE_PARSE, True, 0) if pipe and hasattr(pipe, "add_documents") and docs: pipe.add_documents(docs) for d in docs: tracker.record(d.id, str(p.relative_to(root)), STAGE_INDEX, True, 0) rec["n_indexed"] = len(docs) n_total_indexed += len(docs) n_total_docs += len(docs) rec["ok"] = True except Exception as e: rec["error"] = f"{type(e).__name__}: {e}" tracker.record( f"{p.relative_to(root)}-fail", str(p.relative_to(root)), STAGE_PARSE, False, 0) per_file.append(rec) return { "ok": True, "root": str(root), "n_files_found": len(files), "n_files_ok": sum(1 for r in per_file if r["ok"]), "n_files_failed": sum(1 for r in per_file if not r["ok"]), "n_total_docs": n_total_docs, "n_total_indexed": n_total_indexed, "pipeline_attached": pipe is not None, "per_file": per_file[:500], "truncated": len(per_file) > 500, } def _count_by(counter): return dict(counter) def _parse_to_docs(contents: bytes, target_path: str, source_name: str, kind: str, text_field: str, id_field: str, chunk_size: int, chunk_overlap: int): """Parse raw bytes by `kind` → list of Document. Reuses helpers from the upload endpoint.""" from ..core.types import Document docs = [] if kind in ("parquet",): try: import pyarrow.parquet as pq # type: ignore table = pq.read_table(target_path) rows = [dict(zip(table.column_names, vals)) for vals in zip(*[c.to_pylist() for c in table.columns])] except ImportError: import pandas as pd # type: ignore rows = pd.read_parquet(target_path).to_dict("records") return _rows_to_docs(rows, source_name, text_field, id_field) if kind == "csv": import csv as _csv delim = "\t" if target_path.endswith(".tsv") else "," with open(target_path, encoding="utf-8") as f: rows = list(_csv.DictReader(f, delimiter=delim)) return _rows_to_docs(rows, source_name, text_field, id_field) if kind == "jsonl": rows = [] for line in contents.decode("utf-8", errors="replace").splitlines(): line = line.strip() if not line: continue try: rows.append(json.loads(line)) except Exception: continue return _rows_to_docs(rows, source_name, text_field, id_field) if kind == "pkl": import pickle with open(target_path, "rb") as f: data = pickle.load(f) rows = (data if isinstance(data, list) else [data] if isinstance(data, dict) else []) return _rows_to_docs(rows, source_name, text_field, id_field) if kind in ("txt", "md"): text = contents.decode("utf-8", errors="replace") elif kind == "pdf": text = _extract_pdf_text(target_path) elif kind == "docx": text = _extract_docx_text(target_path) elif kind == "pptx": text = _extract_pptx_text(target_path) elif kind == "xlsx": text = _extract_xlsx_text(target_path) elif kind == "html": text = _extract_html_text(contents) else: return [] chunks = _chunk_text(text, chunk_size, chunk_overlap) for i, ch in enumerate(chunks): docs.append(Document( id=f"{source_name}-{i}", text=ch, metadata={"source": source_name, "chunk": i, "n_chunks": len(chunks), "kind": kind})) return docs # -------- /v1/data/load_jsonl — stream a server-side JSONL into pipe - class LoadJsonlRequest(BaseModel): path: str # server-side absolute path batch_size: int = 500 # docs per pipe.add_documents call max_docs: Optional[int] = None skip_existing: bool = True # don't re-add already-indexed ids @app.post("/v1/data/load_jsonl") def data_load_jsonl_endpoint(req: LoadJsonlRequest, request: Request): """Stream a JSONL corpus from the server filesystem into the live pipeline. Each line: {id, text, metadata}. Use this for huge corpora that don't fit in an HTTP upload. Memory-safe: only `batch_size` docs in RAM at a time. """ import os from pathlib import Path from ..middleware.corpus_ingestion import ( get_corpus_ingestion_tracker, STAGE_PARSE, STAGE_INDEX, ) from ..core.types import Document p = Path(req.path).expanduser().resolve() if not p.exists(): raise HTTPException( status_code=404, detail=f"path does not exist: {p}") if not p.is_file(): raise HTTPException( status_code=400, detail=f"not a file: {p}") if req.batch_size < 1: raise HTTPException( status_code=400, detail="batch_size must be >= 1") pipe = None try: from ..pipeline import get_pipeline pipe = get_pipeline() if not hasattr(pipe, "add_documents"): pipe = None except Exception: pipe = None tracker = get_corpus_ingestion_tracker() n_lines = 0 n_parsed = 0 n_indexed = 0 n_skipped_dupe = 0 parse_errors = 0 batch: List[Document] = [] seen_ids: set = set() def flush(b): nonlocal n_indexed if not b: return if pipe is not None: try: pipe.add_documents(b) n_indexed += len(b) for d in b: tracker.record(d.id, d.metadata.get("source", "?"), STAGE_INDEX, True, 0) except Exception: pass with open(p, encoding="utf-8") as f: for line in f: n_lines += 1 line = line.strip() if not line: continue try: row = json.loads(line) except Exception: parse_errors += 1 continue if not isinstance(row, dict): continue text = row.get("text") or row.get("chunk") or "" doc_id = str(row.get("id", f"jsonl-{n_lines}")) if not text: continue if req.skip_existing and doc_id in seen_ids: n_skipped_dupe += 1 continue seen_ids.add(doc_id) d = Document( id=doc_id, text=str(text), metadata=dict(row.get("metadata", {}))) tracker.record(d.id, d.metadata.get("source", "?"), STAGE_PARSE, True, 0) batch.append(d) n_parsed += 1 if len(batch) >= req.batch_size: flush(batch) batch = [] if req.max_docs and n_parsed >= req.max_docs: break flush(batch) return { "ok": True, "path": str(p), "size_bytes": p.stat().st_size, "n_lines_read": n_lines, "n_parse_errors": parse_errors, "n_skipped_dupes": n_skipped_dupe, "n_docs_parsed": n_parsed, "n_docs_indexed": n_indexed, "pipeline_attached": pipe is not None, "batch_size": req.batch_size, } # -------- /v1/stats — read-only stats, no admin scope required ------- def public_corpus_stats(): """Public corpus stats: size of live index, ingestion counts.""" from ..middleware.corpus_ingestion import \ get_corpus_ingestion_tracker r = get_corpus_ingestion_tracker().analyze().to_dict() # Add pipeline-side size if available try: from ..pipeline import get_pipeline pipe = get_pipeline() if hasattr(pipe, "doc_count"): r["live_index_size"] = pipe.doc_count() elif hasattr(pipe, "_docs"): r["live_index_size"] = len(pipe._docs) else: r["live_index_size"] = None except Exception: r["live_index_size"] = None return r def public_quality_stats(): """Public quality stats: helpfulness + numeric consistency hit rate.""" from ..middleware.answer_helpfulness import \ get_answer_helpfulness_tracker return get_answer_helpfulness_tracker().report().to_dict() def public_gaps_stats(top_k: int = 10): """Public top content gaps — queries returning zero results.""" from ..middleware.zero_result_tracker import \ get_zero_result_tracker return get_zero_result_tracker().report(top_k=top_k).to_dict() def public_latency_profile(query_text: str = "תום לב במשא ומתן", n_iterations: int = 5, top_k: int = 5) -> Dict[str, Any]: """Profile a query end-to-end across N iterations and return per-stage latency statistics (min/p50/p95/max). Stages measured: understand_ms · retrieve_ms · fuse_ms · rerank_ms · generate_ms · verify_ms · post_filter_ms · enrich_ms · total_ms Useful for finding the actual bottleneck before optimizing. Run against representative queries to see the production latency profile. """ import statistics import time as _time try: from ..pipeline import get_pipeline from ..core.types import Query except Exception as e: return {"error": f"imports failed: {e}"} pipe = get_pipeline() # Drop the semantic cache so each run actually re-executes if hasattr(pipe, "cache"): try: pipe.cache.clear() except Exception: pass iterations: List[Dict[str, float]] = [] per_retriever_iters: List[Dict[str, float]] = [] for i in range(max(1, n_iterations)): if hasattr(pipe, "cache"): try: pipe.cache.clear() # ensure no cache hit between runs except Exception: pass t_start = _time.time() try: rag_resp = pipe.run(Query( text=query_text, k=top_k, rerank_k=top_k, )) except Exception as e: iterations.append({"error": str(e), "total_ms": (_time.time() - t_start) * 1000}) continue timing = (getattr(rag_resp, "timing_ms", None) or getattr(rag_resp, "timings", None) or {}) # Add total timing["total_ms"] = (_time.time() - t_start) * 1000 iterations.append(timing) # Pull per-retriever timing (set by MultiRetriever side-channel) try: per_r = getattr(pipe.retrievers, "_last_per_retriever_ms", None) if per_r: per_retriever_iters.append(dict(per_r)) except Exception: pass # Aggregate stats per stage valid = [it for it in iterations if "error" not in it] if not valid: return {"error": "all iterations failed", "iterations": iterations} all_stages = set() for it in valid: all_stages.update(it.keys()) stats = {} for stage in sorted(all_stages): values = [it[stage] for it in valid if stage in it and isinstance(it[stage], (int, float))] if not values: continue stats[stage] = { "min": round(min(values), 1), "p50": round(statistics.median(values), 1), "p95": round(sorted(values)[int(len(values) * 0.95) if len(values) > 1 else 0], 1), "max": round(max(values), 1), "n": len(values), } # Identify the bottleneck (largest p50) non_total = {k: v for k, v in stats.items() if k != "total_ms"} bottleneck = max(non_total.items(), key=lambda x: x[1]["p50"]) if non_total else None # Aggregate per-retriever stats (when available) per_retriever_stats: Dict[str, Dict[str, float]] = {} if per_retriever_iters: all_retrievers = set() for it in per_retriever_iters: all_retrievers.update(it.keys()) for ret in all_retrievers: vals = [it[ret] for it in per_retriever_iters if ret in it] if vals: per_retriever_stats[ret] = { "min": round(min(vals), 1), "p50": round(statistics.median(vals), 1), "max": round(max(vals), 1), "n": len(vals), } # Identify the slowest individual retriever slowest_retriever = None if per_retriever_stats: slowest_retriever = max(per_retriever_stats.items(), key=lambda x: x[1]["p50"]) return { "query": query_text, "n_iterations": len(valid), "stages": stats, "bottleneck_stage": bottleneck[0] if bottleneck else None, "bottleneck_p50_ms": (bottleneck[1]["p50"] if bottleneck else None), "per_retriever": per_retriever_stats, "slowest_retriever": slowest_retriever[0] if slowest_retriever else None, "slowest_retriever_p50_ms": (slowest_retriever[1]["p50"] if slowest_retriever else None), "raw_iterations": iterations, } def public_system_status() -> Dict[str, Any]: """One-stop status endpoint: aggregates the state of every subsystem. Returns a structured dict suitable for a sidebar health widget. Cheap (~10ms) — only reads existing in-memory caches; doesn't trigger builds. """ out: Dict[str, Any] = { "version": "v2-2026-04-26", "subsystems": {}, } # Pipeline / corpus try: from ..pipeline import get_pipeline from ..storage import (get_default_text_store, get_cache_store, fingerprint_corpus) pipe = get_pipeline() docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) n_docs = len(docs) n_lazy = sum(1 for d in docs if (getattr(d, "metadata", None) or {}).get("_lazy_text")) out["subsystems"]["corpus"] = { "n_docs": n_docs, "n_lazy_text": n_lazy, "fingerprint": fingerprint_corpus(pipe), } # Text store try: ts_stats = get_default_text_store().stats() out["subsystems"]["text_store"] = { "n_docs": ts_stats["n_docs"], "total_chars": ts_stats["total_chars"], "compressed_bytes": ts_stats["compressed_bytes"], "db_bytes": ts_stats["db_bytes"], "compression_pct": round( 100 * (1 - ts_stats["compression_ratio"]), 1), } except Exception as e: out["subsystems"]["text_store"] = {"error": str(e)} # Persistent cache try: cs_stats = get_cache_store().stats() out["subsystems"]["cache_store"] = cs_stats except Exception as e: out["subsystems"]["cache_store"] = {"error": str(e)} except Exception as e: out["subsystems"]["corpus"] = {"error": str(e)} return out # Domain coverage try: from collections import Counter domain_counts = Counter() for d in docs: md = (getattr(d, "metadata", None) or {}) inner = md.get("metadata", md) if isinstance(md.get("metadata"), dict) else md dom = inner.get("domain") if isinstance(inner, dict) else None domain_counts[dom or "uncategorized"] += 1 classified = sum(v for k, v in domain_counts.items() if k != "uncategorized") out["subsystems"]["domain_classifier"] = { "n_classified": classified, "n_uncategorized": domain_counts.get("uncategorized", 0), "coverage_pct": round(100 * classified / max(n_docs, 1), 1), "n_distinct_domains": len([k for k in domain_counts if k != "uncategorized"]), } except Exception as e: out["subsystems"]["domain_classifier"] = {"error": str(e)} # Citation network try: cn = getattr(pipe, "_citation_network_cache", None) if cn is not None: out["subsystems"]["citation_network"] = { "n_docs": cn.n_docs, "n_edges": cn.n_edges, "n_unique_citations": len(cn.cited_by), "n_resolved": len(cn.doc_for_citation), "warm": True, } else: out["subsystems"]["citation_network"] = {"warm": False} except Exception as e: out["subsystems"]["citation_network"] = {"error": str(e)} # Precedent analyzer cache try: oc = getattr(pipe, "_overruled_cache", None) out["subsystems"]["precedent_analyzer"] = { "warm": oc is not None, "building": getattr(pipe, "_overruled_cache_building", False), "n_detections": len(oc) if oc else 0, } except Exception as e: out["subsystems"]["precedent_analyzer"] = {"error": str(e)} # Outcome stats cache try: oc = getattr(pipe, "_outcome_stats_cache", None) out["subsystems"]["outcome_stats"] = { "warm": oc is not None, "n_processed": (oc.get("n_processed", 0) if oc else 0), } except Exception as e: out["subsystems"]["outcome_stats"] = {"error": str(e)} # Judge stats cache try: js = getattr(pipe, "_judge_stats_cache", None) out["subsystems"]["judge_stats"] = { "warm": js is not None, "n_judges": len(js) if js else 0, } except Exception as e: out["subsystems"]["judge_stats"] = {"error": str(e)} # Hilbert retriever — float16? try: # Inspect the pipeline's retrievers retrs = getattr(pipe, "retrievers", None) hilbert_info = {"present": False, "float16": False, "n_vecs": 0} if retrs is not None: # The pipeline.retrievers is a MultiRetriever; pull its members members = getattr(retrs, "_retrievers", []) or \ getattr(retrs, "retrievers", []) for r in members: if getattr(r, "name", "") == "hilbert": hilbert_info["present"] = True hilbert_info["float16"] = (getattr(r, "_vecs_f16", None) is not None) if r._vecs_f16 is not None: hilbert_info["n_vecs"] = int(r._vecs_f16.shape[0]) hilbert_info["bytes"] = int(r._vecs_f16.nbytes) break out["subsystems"]["hilbert_retriever"] = hilbert_info except Exception as e: out["subsystems"]["hilbert_retriever"] = {"error": str(e)} # Query result cache (LRU on the MultiRetriever) try: retrs = getattr(pipe, "retrievers", None) if retrs is not None and hasattr(retrs, "cache_stats"): out["subsystems"]["query_cache"] = retrs.cache_stats() else: out["subsystems"]["query_cache"] = {"present": False} except Exception as e: out["subsystems"]["query_cache"] = {"error": str(e)} # Build progress (during long rebuild) try: out["subsystems"]["build_progress"] = public_build_progress() except Exception as e: out["subsystems"]["build_progress"] = {"error": str(e)} # Memory — process RSS + system pressure (psutil if available) try: import psutil as _ps_mem proc = _ps_mem.Process() meminfo = proc.memory_info() vmem = _ps_mem.virtual_memory() rss = meminfo.rss out["subsystems"]["memory"] = { "rss_bytes": int(rss), "rss_mb": round(rss / 1024 / 1024, 1), "rss_gb": round(rss / 1024 / 1024 / 1024, 2), "system_total_gb": round(vmem.total / 1024 / 1024 / 1024, 1), "system_available_gb": round(vmem.available / 1024 / 1024 / 1024, 1), "system_used_pct": round(vmem.percent, 1), "process_share_pct": round(100 * rss / vmem.total, 1), } except ImportError: out["subsystems"]["memory"] = {"error": "psutil not installed"} except Exception as e: out["subsystems"]["memory"] = {"error": str(e)} return out def _trigger_precedent_cache_async(pipe) -> None: """Kick off the corpus-wide overruled-detection cache build in a background thread so user queries don't block on it. First tries to load from persistent SQLite cache (instant warm-up after restart). If fingerprint mismatch or no cache, runs the full detection (~30s on 5k docs) and persists the result. Safe to call repeatedly — uses an attr flag to prevent overlapping builds. """ if getattr(pipe, "_overruled_cache_building", False): return pipe._overruled_cache_building = True def _build(): try: from ..storage import get_cache_store, fingerprint_corpus cache_store = get_cache_store() fp = fingerprint_corpus(pipe) # Try persistent cache first cached = cache_store.get("overruled_v1", fp) if cached is not None: pipe._overruled_cache = cached print(f"[tau-rag] overruled cache loaded from disk: " f"{len(cached)} detections (fingerprint match)") return # Cold build import time as _t t0 = _t.time() from ..precedent_analyzer import detect_overrulings indexed = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) dets = detect_overrulings(indexed) pipe._overruled_cache = dets # Persist for next restart try: cache_store.set("overruled_v1", fp, dets) except Exception: pass print(f"[tau-rag] overruled cache built async: " f"{len(dets)} detections in {_t.time()-t0:.1f}s " f"(persisted)") except Exception as e: pipe._overruled_cache = [] print(f"[tau-rag] overruled cache async build failed: {e}") finally: pipe._overruled_cache_building = False import threading threading.Thread(target=_build, daemon=True, name="overruled-cache-build").start() def _filter_docs_by_section_type(docs: list, section_type: str) -> list: """Keep only docs whose chunk text appears in the given section type of the parent judgment (per the structurer). Used as a post-retrieval filter when the user passes `filters.section_type=discussion` etc. — for example to search only inside the 'דיון' parts of judgments and ignore the rest. Implementation: for each doc, fetch parent text, run structurer once (cached), then check which section the chunk falls into using char_start/end offsets. If the chunk's text appears inside the requested section, keep the doc. """ if not section_type or not docs: return docs try: from ..pipeline import get_pipeline from ..judgment_structurer import structure_judgment except Exception: return docs pipe = get_pipeline() if not hasattr(pipe, "_struct_for_filter_cache"): pipe._struct_for_filter_cache = {} cache = pipe._struct_for_filter_cache # Performance budget: limit total time spent in this filter (so user # queries never block forever on cold caches with thousands of parent # docs to structure). import time as _t deadline = _t.time() + 1.5 # 1.5 seconds total budget per query out = [] for d in docs: if _t.time() > deadline: # Time's up — return what we've already filtered. The other # docs pass through unchanged (better than blocking). out.extend(docs[len(out):]) break if not isinstance(d, dict): continue chunk_text = (d.get("text") or "")[:200] if len(chunk_text) < 30: out.append(d) # too short to filter — keep continue doc_id = d.get("doc_id") if not doc_id: continue # Fetch parent doc text (lazy-aware) if doc_id in cache: sections = cache[doc_id] else: try: parent = next((x for x in (getattr(pipe, "_indexed_docs", None) or []) if getattr(x, "id", None) == doc_id), None) if parent is None: continue text = (getattr(parent, "text", None) or "") if not text and (getattr(parent, "metadata", None) or {}).get("_lazy_text"): try: text = pipe.get_text(doc_id) or "" except Exception: text = "" if not text: cache[doc_id] = [] continue struct = structure_judgment(text) sections = struct.get("sections", []) cache[doc_id] = sections except Exception: cache[doc_id] = [] continue # Check whether the chunk lives in a section of the requested type keep = False for s in sections: if s.get("id") != section_type: continue sec_text = s.get("text", "") if chunk_text in sec_text: keep = True d["_section_match"] = section_type break if keep: out.append(d) return out def public_export_answer_docx(body: Dict[str, Any]) -> bytes: """Generate a Hebrew legal memo .docx from an answer payload. Body shape: {"question": str, "payload": {answer, docs, confidence, ...}} Returns raw bytes of the docx file. """ from io import BytesIO try: from docx import Document as DocxDocument from docx.shared import Pt, Inches, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn from docx.oxml import OxmlElement except ImportError: raise RuntimeError("python-docx not installed; pip install python-docx") question = (body.get("question") or "").strip() payload = body.get("payload") or {} answer = (payload.get("answer") or "").strip() docs = payload.get("docs") or [] confidence = payload.get("confidence") domain = (payload.get("understanding") or {}).get("domain") doc = DocxDocument() # Set default font + RTL section properties style = doc.styles['Normal'] style.font.name = 'David' style.font.size = Pt(11) rPr = style.element.get_or_add_rPr() rFonts = rPr.find(qn('w:rFonts')) if rFonts is None: rFonts = OxmlElement('w:rFonts') rPr.append(rFonts) rFonts.set(qn('w:cs'), 'David') rFonts.set(qn('w:hAnsi'), 'David') def _rtl_para(p): """Mark paragraph as RTL (Hebrew).""" pPr = p._p.get_or_add_pPr() bidi = OxmlElement('w:bidi') bidi.set(qn('w:val'), '1') pPr.append(bidi) # Title title = doc.add_heading(question or "תשובה משפטית", level=1) _rtl_para(title) title.alignment = WD_ALIGN_PARAGRAPH.RIGHT for run in title.runs: run.font.color.rgb = RGBColor(0x4F, 0x46, 0xE5) # Subtitle / metadata import datetime as _dt sub = doc.add_paragraph() _rtl_para(sub) sub.alignment = WD_ALIGN_PARAGRAPH.RIGHT r = sub.add_run(f"נוצר על-ידי tau-rag · {_dt.date.today().strftime('%d/%m/%Y')}") r.italic = True r.font.size = Pt(9) r.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8) if domain: sub.add_run(f" · תחום: {domain}").font.size = Pt(9) if confidence is not None: sub.add_run(f" · ביטחון: {round(float(confidence)*100)}%").font.size = Pt(9) # Answer body h = doc.add_heading("תשובה", level=2) _rtl_para(h); h.alignment = WD_ALIGN_PARAGRAPH.RIGHT for para in (answer.split("\n\n") if answer else [""]): if not para.strip(): continue p = doc.add_paragraph(para.strip()) _rtl_para(p) p.alignment = WD_ALIGN_PARAGRAPH.RIGHT p.paragraph_format.line_spacing = 1.5 # Sources block if docs: h2 = doc.add_heading(f"מקורות ({len(docs)})", level=2) _rtl_para(h2); h2.alignment = WD_ALIGN_PARAGRAPH.RIGHT for i, d in enumerate(docs[:10], 1): md = (d.get("metadata") or {}) if isinstance(d, dict) else {} title_text = (md.get("citation") or md.get("title") or d.get("doc_id", "?")) p = doc.add_paragraph(style="List Number") _rtl_para(p) p.alignment = WD_ALIGN_PARAGRAPH.RIGHT r1 = p.add_run(str(title_text)) r1.bold = True extras = [] if md.get("court"): extras.append(str(md["court"])) if md.get("verdict_dt"): extras.append(str(md["verdict_dt"])[:10]) ps = d.get("precedent_status") or {} if ps.get("is_overruled"): extras.append("⚠ הלכה שבוטלה") if extras: r2 = p.add_run(" · " + " · ".join(extras)) r2.font.size = Pt(9) r2.font.color.rgb = RGBColor(0x47, 0x55, 0x69) # Excerpt text = (d.get("text") if isinstance(d, dict) else "") or "" import re as _re_x excerpt = _re_x.sub(r"\[[^\]]+\]\s*", "", text).strip()[:240] if excerpt: ep = doc.add_paragraph() _rtl_para(ep) ep.alignment = WD_ALIGN_PARAGRAPH.RIGHT ep.paragraph_format.left_indent = Inches(0.25) er = ep.add_run("« " + excerpt + " »") er.italic = True er.font.size = Pt(9) er.font.color.rgb = RGBColor(0x47, 0x55, 0x69) # Footer note fp = doc.add_paragraph() _rtl_para(fp) fp.alignment = WD_ALIGN_PARAGRAPH.RIGHT fr = fp.add_run("\n— מסמך זה נוצר אוטומטית על ידי tau-rag לצרכי מחקר משפטי. " "אין בו ייעוץ משפטי. אנא אמת מקורות מול גרסה רשמית של פסקי הדין.") fr.italic = True fr.font.size = Pt(8) fr.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8) buf = BytesIO() doc.save(buf) return buf.getvalue() def _enrich_docs_with_precedent_status(docs: list) -> None: """Add `precedent_status` to each doc in-place. Status shape: {is_overruled: bool, kind: str|None, overruling_doc_id: str|None, overruling_title: str|None, confidence: float} NON-BLOCKING: if the corpus-wide overruled cache is cold, this returns immediately without enriching (rather than spending 30+ seconds doing the cold build during a user query). The cache is built either at startup (see _build_precedent_caches_async) or on-demand via the /v1/precedents/overruled endpoint. """ if not docs: return try: from ..pipeline import get_pipeline from ..precedent_analyzer import is_overruled pipe = get_pipeline() cached = getattr(pipe, "_overruled_cache", None) if cached is None: # Cache cold — kick off background build but don't wait. Mark # all docs as "unknown" and return immediately. _trigger_precedent_cache_async(pipe) for d in docs: if isinstance(d, dict): d["precedent_status"] = { "is_overruled": False, "kind": None, "overruling_doc_id": None, "overruling_title": None, "confidence": 0.0, } return if not cached: # No detections in this corpus — mark all as unknown for d in docs: if isinstance(d, dict): d["precedent_status"] = { "is_overruled": False, "kind": None, "overruling_doc_id": None, "overruling_title": None, "confidence": 0.0, } return for d in docs: if not isinstance(d, dict): continue meta = d.get("metadata") or {} citation = meta.get("citation") or d.get("title") or d.get("doc_id", "") match = is_overruled(citation, cached) if match: d["precedent_status"] = { "is_overruled": match.get("kind") in ("explicit_overrule", "depart_from"), "kind": match.get("kind"), "overruling_doc_id": match.get("overruling_doc_id"), "overruling_title": match.get("overruling_title"), "snippet": match.get("snippet", "")[:200], "confidence": float(match.get("confidence", 0.0)), } else: d["precedent_status"] = { "is_overruled": False, "kind": None, "overruling_doc_id": None, "overruling_title": None, "confidence": 0.0, } except Exception: pass def public_judge_stats(top_k: int = 20): """Per-judge analytics: case counts, outcome distribution, top-domains. Walks the live pipeline once: for each judgment-like doc, extracts the judges (using legal_entities) + outcome (using judgment_structurer) and aggregates. Cached on the pipeline; ~5-10s build for ~5000 docs, instant after. """ try: from ..pipeline import get_pipeline from ..legal_entities import extract_judges from ..judgment_structurer import structure_judgment from ..storage import get_cache_store, fingerprint_corpus pipe = get_pipeline() # In-memory cache (fastest) cached = getattr(pipe, "_judge_stats_cache", None) if cached is not None: return {"top_k": top_k, "judges": cached[:top_k], "n_total": len(cached), "cached": True} # Persistent cache (instant warm-up after restart) try: fp = fingerprint_corpus(pipe) cs = get_cache_store() persisted = cs.get("judge_stats_v1", fp) if persisted is not None: pipe._judge_stats_cache = persisted return {"top_k": top_k, "judges": persisted[:top_k], "n_total": len(persisted), "cached": True} except Exception: pass docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) outcomes_init = ("accepted", "rejected", "partial", "remanded", "unknown") # judge_normalized → stats dict agg: Dict[str, Dict[str, Any]] = {} for d in docs: doc_id = getattr(d, "id", None) if not doc_id: continue text = getattr(d, "text", "") or "" md = getattr(d, "metadata", None) or {} if not text and md.get("_lazy_text"): try: text = pipe.get_text(doc_id) or "" except Exception: continue if len(text) < 200: continue judges = extract_judges(text) if not judges: continue try: struct = structure_judgment(text) outcome = struct.get("outcome", "unknown") except Exception: outcome = "unknown" domain = md.get("domain") for j in judges: key = j["normalized"] or j["name"] if key not in agg: agg[key] = { "judge": key, "name": j["name"], "title": j["title"], "surname": j.get("surname"), "n_cases": 0, "outcomes": {o: 0 for o in outcomes_init}, "domains": {}, "doc_ids": [], } row = agg[key] row["n_cases"] += 1 row["outcomes"][outcome] = row["outcomes"].get(outcome, 0) + 1 if domain: row["domains"][domain] = row["domains"].get(domain, 0) + 1 if len(row["doc_ids"]) < 12: row["doc_ids"].append(doc_id) # Sort by n_cases descending rows = sorted(agg.values(), key=lambda x: -x["n_cases"]) # Compute "top domain" per judge for compact display for r in rows: r["top_domain"] = (max(r["domains"].items(), key=lambda x: x[1])[0] if r["domains"] else None) pipe._judge_stats_cache = rows # Persist for next restart try: cs.set("judge_stats_v1", fp, rows) except Exception: pass return {"top_k": top_k, "judges": rows[:top_k], "n_total": len(rows), "cached": False} except Exception as e: return {"error": str(e)}, 500 def public_judgment_entities(doc_id: str): """Hebrew legal NER for one judgment. Returns extracted entities: case_number (with full taxonomy), court level, judges, lawyers, parties (with role classification), section references, and topic keywords. Lazy-text aware. """ if not doc_id: return {"error": "missing doc_id"}, 404 try: from ..pipeline import get_pipeline from ..legal_entities import extract_entities pipe = get_pipeline() docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) target = next((d for d in docs if (getattr(d, "id", None) == doc_id) or (getattr(d, "id", "") or "").endswith(doc_id)), None) if target is None: return {"error": "doc not found", "doc_id": doc_id}, 404 text = getattr(target, "text", "") or "" if not text: md = getattr(target, "metadata", None) or {} if md.get("_lazy_text"): text = pipe.get_text(target.id) or "" result = extract_entities(text) result["doc_id"] = doc_id return result except Exception as e: return {"error": str(e), "doc_id": doc_id}, 500 def public_overruled_precedents(top_k: int = 50): """Detect overrulings across the entire corpus by scanning for Hebrew overruling phrases. Cached after first call. """ try: from ..pipeline import get_pipeline from ..precedent_analyzer import detect_overrulings pipe = get_pipeline() cached = getattr(pipe, "_overruled_cache", None) if cached is not None: return {"detections": cached[:top_k], "n_total": len(cached), "cached": True} docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) dets = detect_overrulings(docs) # Sort by confidence desc, prefer explicit_overrule kind_order = {"explicit_overrule": 0, "depart_from": 1, "implicit_overrule": 2, "distinguish": 3, "reaffirm": 4} dets.sort(key=lambda x: (kind_order.get(x.get("kind"), 9), -x.get("confidence", 0))) pipe._overruled_cache = dets return {"detections": dets[:top_k], "n_total": len(dets), "cached": False} except Exception as e: return {"error": str(e)}, 500 def public_doctrine_evolution(keyword: str, top_k: int = 50): """Find judgments that mention a specific doctrine, ordered chronologically. Example: keyword='אפרופים' → all docs that reference הלכת אפרופים, sorted by verdict_dt ascending. Useful for tracing how a doctrine has evolved or been narrowed/expanded over time. """ if not keyword: return {"error": "missing keyword"}, 400 try: from ..pipeline import get_pipeline from ..precedent_analyzer import find_doctrine_evolution pipe = get_pipeline() docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) hits = find_doctrine_evolution(docs, keyword) return {"keyword": keyword, "n_total": len(hits), "hits": hits[:top_k]} except Exception as e: return {"error": str(e)}, 500 def public_judgment_network(doc_id: str): """Citation network for one judgment: cites + cited_by + cocited. Walks the citation index (built lazily via citation_network.get_or_build), pulls the doc's outgoing citations, the docs that cite it (when this doc's metadata.citation matches an incoming reference), and a co-citation recommendation list (other docs sharing ≥1 citation). """ if not doc_id: return {"error": "missing doc_id"}, 404 try: from ..pipeline import get_pipeline from ..citation_network import get_or_build, network_for_doc pipe = get_pipeline() # Verify doc exists in the pipeline docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) target = next((d for d in docs if (getattr(d, "id", None) == doc_id) or (getattr(d, "id", "") or "").endswith(doc_id)), None) if target is None: return {"error": "doc not found", "doc_id": doc_id}, 404 cn = get_or_build(pipe) result = network_for_doc(cn, target.id) # Enrich cited_by + cocited entries with light metadata for display def _enrich(did: str) -> Dict[str, Any]: d = next((x for x in docs if getattr(x, "id", None) == did), None) if not d: return {"doc_id": did} md = getattr(d, "metadata", None) or {} return { "doc_id": did, "title": md.get("citation") or md.get("title") or did, "domain": md.get("domain"), "court": md.get("court"), "verdict_dt": md.get("verdict_dt"), } result["cited_by"] = [_enrich(d) for d in result["cited_by"]] result["cocited"] = [{**c, **_enrich(c["doc_id"])} for c in result["cocited"]] return result except Exception as e: return {"error": str(e), "doc_id": doc_id}, 500 def public_popular_citations(top_k: int = 25, kind: Optional[str] = None): """Most-cited references in the corpus. Returns ranked list with kind, n_citers, resolved_doc_id (when the cited thing is itself a doc in the corpus).""" try: from ..pipeline import get_pipeline from ..citation_network import get_or_build, popular_citations cn = get_or_build(get_pipeline()) rows = popular_citations(cn, top_k=top_k, kind=kind) return {"top_k": top_k, "kind": kind, "rows": rows, "n_total_citations": len(cn.cited_by)} except Exception as e: return {"error": str(e)}, 500 def public_outcome_stats(): """Per-domain breakdown of judgment outcomes. Walks every doc that the structurer can produce an outcome for and tallies counts by (domain, outcome). Cached per-pipeline. """ try: from ..pipeline import get_pipeline from ..judgment_structurer import structure_judgment from ..domain_classifier import all_domains_meta pipe = get_pipeline() # Cache the heavy walk: in-memory first, then persistent cached = getattr(pipe, "_outcome_stats_cache", None) if cached is not None: return cached try: from ..storage import get_cache_store, fingerprint_corpus cs = get_cache_store() fp = fingerprint_corpus(pipe) persisted = cs.get("outcome_stats_v1", fp) if persisted is not None: pipe._outcome_stats_cache = persisted return persisted except Exception: pass docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) # Initialize per-domain bucket per_domain: Dict[str, Dict[str, int]] = {} outcomes = ("accepted", "rejected", "partial", "remanded", "unknown") domain_meta = {d["id"]: d for d in all_domains_meta()} for did in domain_meta.keys(): per_domain[did] = {o: 0 for o in outcomes} per_domain["__uncategorized__"] = {o: 0 for o in outcomes} totals: Dict[str, int] = {o: 0 for o in outcomes} n_processed = 0 seen_ids: set = set() for d in docs: doc_id = getattr(d, "id", None) if not doc_id or doc_id in seen_ids: continue seen_ids.add(doc_id) text = getattr(d, "text", "") or "" md = getattr(d, "metadata", None) or {} # Lazy-text: fetch from store on demand. Skipping the lookup # for already-processed cached results below. if not text and md.get("_lazy_text"): try: text = pipe.get_text(doc_id) or "" except Exception: text = "" # Skip really short docs — they're usually summaries/snippets # without an operative outcome. if len(text) < 200: continue inner = md.get("metadata", md) if isinstance(md.get("metadata"), dict) else md domain = inner.get("domain") if isinstance(inner, dict) else None try: struct = structure_judgment(text) outcome = struct.get("outcome", "unknown") except Exception: outcome = "unknown" bucket = per_domain.get(domain) if domain else per_domain["__uncategorized__"] if not bucket: bucket = per_domain.setdefault(domain, {o: 0 for o in outcomes}) bucket[outcome] = bucket.get(outcome, 0) + 1 totals[outcome] = totals.get(outcome, 0) + 1 n_processed += 1 # Build response rows = [] for did, counts in per_domain.items(): total = sum(counts.values()) if total == 0: continue meta = domain_meta.get(did) or { "id": did, "label_he": "לא מסווג", "icon": "❓", "color": "#94a3b8", } rows.append({**meta, "counts": counts, "total": total}) rows.sort(key=lambda r: -r["total"]) result = { "n_processed": n_processed, "totals": totals, "domains": rows, } pipe._outcome_stats_cache = result # Persist for next restart try: cs.set("outcome_stats_v1", fp, result) except Exception: pass return result except Exception as e: return {"error": str(e)}, 500 def public_judgment_structured(doc_id: str): """Return structured-judgment view for a single corpus doc. Walks the live pipeline's indexed docs, finds the requested doc_id, runs the Hebrew judgment structurer over its full text, and returns the JSON payload (header / sections / citations / outcome / stats). Cached per-process by doc_id+text-hash so repeated opens are cheap. Falls back to a low-confidence single-section payload when text can't be cleanly segmented. Returns 404 when the doc isn't in the index. """ if not doc_id: return {"error": "missing doc_id"}, 404 # Lazy module-level cache global _JUDGMENT_STRUCT_CACHE try: _JUDGMENT_STRUCT_CACHE except NameError: _JUDGMENT_STRUCT_CACHE = {} try: from ..pipeline import get_pipeline from ..judgment_structurer import structure_judgment, section_catalog pipe = get_pipeline() docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) # Find the doc by id (or by suffix match for resilience to slashes) target = None for d in docs: did = getattr(d, "id", None) or getattr(d, "doc_id", None) if did == doc_id or (did and did.endswith("/" + doc_id)) \ or (did and did.endswith(doc_id)): target = d break if target is None: return {"error": "doc not found", "doc_id": doc_id}, 404 text = getattr(target, "text", "") or "" meta = getattr(target, "metadata", None) or {} # Lazy-text fallback: when add_documents stripped text on ingest, # fetch it from the SQLite-backed store on demand. if not text and meta.get("_lazy_text"): try: text = pipe.get_text(target.id) or "" except Exception: pass # Cache key: doc id + first 80 chars hash (catches edits) import hashlib as _hl key = f"{doc_id}:{_hl.md5(text[:200].encode('utf-8')).hexdigest()[:8]}" cached = _JUDGMENT_STRUCT_CACHE.get(key) if cached is not None: return cached result = structure_judgment(text) # Full-text safety net: ALWAYS attach the original text so the # frontend has a fallback if structuring loses anything. Plus a # coverage ratio so the UI can warn when sections don't reconstruct # the source. section_chars = sum(len((s or {}).get("text", "")) for s in result.get("sections", [])) full_len = max(len(text), 1) result["coverage"] = round(section_chars / full_len, 3) result["full_text"] = text # Layer in supplementary metadata from the corpus record so the # frontend doesn't need to hit a second endpoint. result["doc_id"] = doc_id result["metadata"] = { "title": meta.get("title") or meta.get("citation"), "court": meta.get("court"), "verdict_dt": meta.get("verdict_dt"), "domain": meta.get("domain"), "type": meta.get("type"), "source": meta.get("source"), "kind": meta.get("kind"), } # Override extracted header values when the corpus has authoritative # metadata (rather than what we sniffed from the text head). if meta.get("court") and not result["header"].get("court"): result["header"]["court"] = meta["court"] if meta.get("citation") and not result["header"].get("case_number"): result["header"]["case_number"] = meta["citation"] if meta.get("verdict_dt") and not result["header"].get("date"): result["header"]["date"] = str(meta["verdict_dt"])[:10] # Section catalog included so frontend can render an "Unknown" # tab for sections we recognize but didn't find. result["catalog"] = section_catalog() _JUDGMENT_STRUCT_CACHE[key] = result # Cap cache to ~500 entries (LRU-ish: drop oldest by insertion) if len(_JUDGMENT_STRUCT_CACHE) > 500: for k in list(_JUDGMENT_STRUCT_CACHE.keys())[:50]: _JUDGMENT_STRUCT_CACHE.pop(k, None) return result except Exception as e: return {"error": str(e), "doc_id": doc_id}, 500 def public_classify_query(text: str): """Classify a free-text legal query into legal domain(s). Used by the frontend to auto-suggest domain filter chips while the user types. Pure rule-based — runs in <1ms — so it's safe to call on every keystroke (with frontend debouncing). Returns: { "top": "contracts" | None, "ranked": [{"domain":"contracts","score":12.0,"matches":[...]}, ...], "scores": {"contracts": 12.0, "torts": 3.0}, "catalog": [domain meta dicts...] # for chip rendering } """ from ..domain_classifier import classify, domain_meta, all_domains_meta if not text or not text.strip(): return {"top": None, "ranked": [], "scores": {}, "catalog": all_domains_meta()} res = classify(text) # Enrich ranked entries with display meta (icon/color/label) so the # frontend doesn't need a second lookup. for r in res.get("ranked", []): m = domain_meta(r["domain"]) if m: r.update(m) res["catalog"] = all_domains_meta() return res def public_domain_stats(): """Counts of indexed docs grouped by legal domain. Walks the live pipeline's documents (each is stamped with metadata.domain at load time by _autoload_corpus_on_startup), tallies per-domain counts, and returns alongside the full domain catalog so the frontend can render the sidebar browser even for empty domains. Each chunk shares its parent doc's domain, so we de-dupe by doc_id before counting. If the pipeline's doc list is unavailable, returns just the catalog with zero counts (graceful degradation). """ from ..domain_classifier import all_domains_meta as _meta catalog = _meta() by_domain: Dict[str, int] = {d["id"]: 0 for d in catalog} by_domain_caselaw: Dict[str, int] = {d["id"]: 0 for d in catalog} by_domain_statute: Dict[str, int] = {d["id"]: 0 for d in catalog} uncategorized = 0 total = 0 try: from ..pipeline import get_pipeline pipe = get_pipeline() # Pipeline keeps an _indexed_docs attr — list[Document] — set when # add_documents runs. Fall back to other shapes for forward-compat. docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) seen_ids = set() for d in docs: doc_id = getattr(d, "id", None) or getattr(d, "doc_id", None) if not doc_id or doc_id in seen_ids: continue seen_ids.add(doc_id) total += 1 md = getattr(d, "metadata", None) or {} if isinstance(md, dict): inner = md.get("metadata", md) if isinstance(md.get("metadata"), dict) else md domain = inner.get("domain") if isinstance(inner, dict) else None kind = (inner.get("type") or inner.get("kind") or "").lower() \ if isinstance(inner, dict) else "" else: domain, kind = None, "" if domain and domain in by_domain: by_domain[domain] += 1 if kind in ("caselaw", "psak", "פסיקה"): by_domain_caselaw[domain] += 1 elif kind in ("statute", "law", "חוק", "חקיקה"): by_domain_statute[domain] += 1 else: uncategorized += 1 except Exception as _e: return { "total": 0, "uncategorized": 0, "domains": [{**d, "count": 0, "caselaw": 0, "statute": 0} for d in catalog], "error": str(_e), } domains_out = [ {**d, "count": by_domain.get(d["id"], 0), "caselaw": by_domain_caselaw.get(d["id"], 0), "statute": by_domain_statute.get(d["id"], 0)} for d in catalog ] # Sort: non-zero counts first (desc), then alphabetical domains_out.sort(key=lambda x: (-x["count"], x["label_he"])) return { "total": total, "uncategorized": uncategorized, "domains": domains_out, } def feedback_endpoint(req: FeedbackRequest, request: Request): """Public end-user feedback. Doesn't require admin scope — just user identity (any X-API-Key, including dev keys).""" from ..middleware.answer_helpfulness import \ get_answer_helpfulness_tracker try: f = get_answer_helpfulness_tracker().submit( request_id=req.request_id, vote=req.vote, reasons=req.reasons or [], free_text=req.free_text or "", user_id=req.session_id, # use session as pseudonymous id ) return {"ok": True, "feedback_id": f.feedback_id} except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) def platform_changelog( format: str = "json", category: Optional[str] = None, version_prefix: Optional[str] = None, ported_only: bool = False, ): from ..middleware.changelog_gen import get_changelog_generator g = get_changelog_generator() if format == "json": if category or version_prefix or ported_only: return {"entries": g.list(category=category, version_prefix=version_prefix, ported_only=ported_only)} return g.render_json() if format == "markdown": return PlainTextResponse(g.render_markdown()) if format == "text": return PlainTextResponse(g.render_text()) if format == "compact": return g.render_compact_summary() raise HTTPException(status_code=400, detail={"error": "invalid format", "valid": ["json", "markdown", "text", "compact"]}) def latest_signals(): if not _pipeline.cache: return {"empty": True} last = list(_pipeline.cache.values())[-1] return last.signals.to_dict() # ---- Chat / multi-turn --------------------------------------------------- def chat(req: ChatRequest, request: Request): """Non-streaming conversational endpoint with session memory.""" try: validate_query_text(req.query) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) resp = _pipeline.run_conversation(req.query, req.session_id, lang=req.lang) from ..memory import get_store session = get_store().get_or_create(req.session_id) out = resp.to_dict() out["session_id"] = req.session_id out["n_turns"] = len(session.turns) return out def session_info(session_id: str): from ..memory import get_store session = get_store().get(session_id) if session is None: raise HTTPException(status_code=404, detail="session not found") return session.to_dict() def session_drop(session_id: str): from ..memory import get_store dropped = get_store().drop(session_id) return {"session_id": session_id, "dropped": dropped} def sessions_list(details: int = 0, min_turns: int = 0): from ..memory import get_store store = get_store() if details: rows = store.summaries() if min_turns > 0: rows = [row for row in rows if int(row.get("n_turns", 0)) >= min_turns] return {"count": len(rows), "sessions": rows} ids = store.list_ids() if min_turns > 0: ids = [ sid for sid in ids if ((store.get(sid) and len(store.get(sid).turns) >= min_turns)) ] return {"count": len(ids), "sessions": ids} def sessions_gc(request: Request): from ..memory import get_store summary = get_store().gc_now() try: get_obs().audit("sessions.gc", request_id=_rid_from(request), **summary) except Exception: pass return summary def sessions_drop_all(request: Request): from ..memory import get_store dropped = get_store().drop_all() body = {"dropped": dropped} try: get_obs().audit("sessions.drop_all", request_id=_rid_from(request), **body) except Exception: pass return body def chat_stream(req: ChatRequest, request: Request): """Server-Sent Events version of /v1/chat — same conversational context as /v1/chat (follow-up expansion, session history) but streaming. Event order: event: followup data: {"is_followup": bool, "expanded_query": "..."} event: retrieved data: {"doc_ids": [...], "count": N} event: answer data: {"chunk": "word "} (repeated) event: done data: {session_id, n_turns, answer, sources, omega, passed, verification} event: error data: {"code","message"} (on failure) """ try: validate_query_text(req.query) except OverflowError as e: raise HTTPException(status_code=413, detail=str(e)) except ValueError as e: raise HTTPException(status_code=422, detail=str(e)) from fastapi.responses import StreamingResponse import json as _json def _sse(event: str, data: Any) -> str: return f"event: {event}\ndata: {_json.dumps(data, ensure_ascii=False)}\n\n" def _event_gen(): try: # Detect follow-up + expand (same logic as run_conversation) from ..memory import expand_followup, get_store, is_followup store = get_store() session = store.get_or_create(req.session_id) followup = bool(is_followup(req.query, lang=req.lang)) and bool(session.turns) expanded = expand_followup(req.query, session) if followup else req.query yield _sse("followup", { "is_followup": followup, "expanded_query": expanded if followup else None, }) # Run the pipeline (extractive — ~ms) resp = _pipeline.run_conversation(req.query, req.session_id, lang=req.lang) # Stage 1: retrieval results retrieved = [] seen = set() for c in getattr(resp, "retrieved", []) or []: did = getattr(getattr(c, "chunk", None), "doc_id", None) if did and did not in seen: retrieved.append(did) seen.add(did) yield _sse("retrieved", {"doc_ids": retrieved, "count": len(retrieved)}) # Stage 2: answer streamed word-by-word answer = resp.answer or "" for w in answer.split(" "): if not w: continue yield _sse("answer", {"chunk": w + " "}) # Stage 3: final envelope — includes session_id + n_turns for client try: omega = float(resp.signals.omega) if resp.signals else None except Exception: omega = None verif = getattr(resp, "verification", None) # Re-read session — run_conversation added a new turn session = store.get_or_create(req.session_id) yield _sse("done", { "session_id": req.session_id, "n_turns": len(session.turns), "answer": answer, "sources": list(resp.sources or []), "omega": omega, "passed": bool(getattr(verif, "passed", False)) if verif else None, "verification": (verif.to_dict() if hasattr(verif, "to_dict") else getattr(verif, "__dict__", None)), }) except Exception as e: yield _sse("error", { "code": "pipeline_error", "message": f"{type(e).__name__}: {e}"[:240], }) return StreamingResponse(_event_gen(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "X-Accel-Buffering": "no", }) # ────────────────────────────────────────────────────────────────── # v4.x — Document Upload + Community Moderation # ────────────────────────────────────────────────────────────────── from fastapi import UploadFile, File, Form, HTTPException as _HTTPExc @app.post("/v1/upload/session") async def upload_session_doc( file: UploadFile = File(...), session_id: str = Form(...), uploader: str = Form("anonymous"), ): """Upload a document for the current session ONLY (TTL 24h). Use this when a lawyer wants to query against their own contract. The doc never enters the public corpus.""" from ..upload import parser as _up_parser from ..upload import store as _up_store content = await file.read() if len(content) > 8 * 1024 * 1024: raise _HTTPExc(413, "File too large (max 8MB)") try: text = _up_parser.parse_text(content, file.filename or "") except Exception as e: raise _HTTPExc(400, f"Parse error: {e}") if len(text) < 50: raise _HTTPExc(400, "Extracted text too short") chunks = _up_parser.chunk_text(text, chunk_chars=1200, overlap=200) metadata = _up_parser.extract_metadata(text) metadata["filename"] = file.filename or "" doc_id = _up_store.save_session_upload( session_id=session_id, filename=file.filename or "", text=text, metadata=metadata, chunks=chunks, uploader=uploader, ) return { "doc_id": doc_id, "session_id": session_id, "char_count": len(text), "chunk_count": len(chunks), "metadata": metadata, } @app.get("/v1/upload/session/{session_id}") async def list_session_docs(session_id: str): """List docs uploaded in this session (within 24h TTL).""" from ..upload import store as _up_store docs = _up_store.get_session_uploads(session_id) return { "session_id": session_id, "n_docs": len(docs), "docs": [{"doc_id": d["doc_id"], "filename": d["filename"], "metadata": d["metadata"], "chunk_count": len(d["chunks"]), "uploaded_ts": d["uploaded_ts"]} for d in docs], } @app.post("/v1/upload/submit") async def submit_to_corpus( file: UploadFile = File(...), uploader: str = Form(...), uploader_email: str = Form(""), kind: str = Form("caselaw"), ): """Submit a doc to the community moderation queue. Pending until a moderator approves; then it's added to the public corpus.""" from ..upload import parser as _up_parser from ..upload import store as _up_store content = await file.read() if len(content) > 8 * 1024 * 1024: raise _HTTPExc(413, "File too large (max 8MB)") if not uploader.strip(): raise _HTTPExc(400, "uploader (your name) is required") try: text = _up_parser.parse_text(content, file.filename or "") except Exception as e: raise _HTTPExc(400, f"Parse error: {e}") if len(text) < 200: raise _HTTPExc(400, "Document too short to submit (min 200 chars)") chunks = _up_parser.chunk_text(text, chunk_chars=1200, overlap=200) metadata = _up_parser.extract_metadata(text) metadata["filename"] = file.filename or "" metadata["uploader"] = uploader sub_id = _up_store.submit_to_queue( uploader=uploader, uploader_email=uploader_email, filename=file.filename or "", text=text, metadata=metadata, chunks=chunks, kind=kind, ) return { "sub_id": sub_id, "state": "pending", "char_count": len(text), "chunk_count": len(chunks), "metadata": metadata, } @app.get("/v1/moderation/queue") async def list_moderation_queue(state: str = "pending", limit: int = 50): """List submissions awaiting (or filtered by state) moderation.""" from ..upload import store as _up_store return { "state": state, "items": _up_store.list_queue(state=state, limit=limit), } @app.get("/v1/moderation/submission/{sub_id}") async def get_submission_detail(sub_id: str): from ..upload import store as _up_store sub = _up_store.get_submission(sub_id) if not sub: raise _HTTPExc(404, "submission not found") # Truncate text for preview; full text only via separate endpoint if len(sub.get("text", "")) > 4000: sub["text_preview"] = sub["text"][:4000] + "…" sub["text_full_length"] = len(sub["text"]) del sub["text"] return sub class _ModerationDecideRequest(BaseModel): sub_id: str decision: str # 'approve' or 'reject' actor: str note: Optional[str] = "" @app.post("/v1/moderation/decide") async def decide_submission(req: _ModerationDecideRequest): """Approve or reject a queued submission. Audited. On approve: also promotes the doc into the live corpus (so all users can find it immediately) and persists it to runtime/community_corpus.jsonl for restart-survivability. """ from ..upload import store as _up_store if req.decision not in ("approve", "reject"): raise _HTTPExc(400, "decision must be 'approve' or 'reject'") if not (req.actor or "").strip(): raise _HTTPExc(400, "actor (your name) is required") ok = _up_store.decide(req.sub_id, req.decision, req.actor, (req.note or "")) if not ok: raise _HTTPExc(409, "submission not pending or not found") out: Dict[str, Any] = {"sub_id": req.sub_id, "decision": req.decision, "actor": req.actor} # On approve: promote into the live corpus + community JSONL. # We catch errors here so a flaky promote doesn't undo the # already-committed moderation decision (which is the source of # truth). Surface the warning instead. # # IMPORTANT: use pipeline.get_pipeline() — that's the instance # /v1/query and /v1/data/load_jsonl read from. fastapi_app's local # _pipeline is a SEPARATE instance and writes to it are invisible # to search. if req.decision == "approve": try: from ..upload import promote as _up_promote from ..pipeline import get_pipeline as _get_pipe promo = _up_promote.promote_to_corpus(_get_pipe(), req.sub_id) out["promotion"] = promo except Exception as e: out["promotion_warning"] = f"{type(e).__name__}: {e}" return out @app.get("/v1/moderation/log") async def get_mod_log(sub_id: Optional[str] = None, limit: int = 50): from ..upload import store as _up_store return {"log": _up_store.get_moderation_log(sub_id=sub_id, limit=limit)} @app.post("/v1/moderation/bulk_promote") async def moderation_bulk_promote(): """Promote every approved-but-not-yet-promoted submission into the live corpus. Useful for backfill / disaster recovery.""" from ..upload import promote as _up_promote from ..pipeline import get_pipeline as _get_pipe return _up_promote.bulk_promote_unpromoted(_get_pipe()) @app.post("/v1/moderation/replay_corpus") async def moderation_replay_corpus(): """Re-load runtime/community_corpus.jsonl into the running pipeline without re-stamping promoted_ts. Useful to recover community docs after a fresh process start without env-var-driven autoload.""" from ..upload import promote as _up_promote from ..pipeline import get_pipeline as _get_pipe return _up_promote.replay_community_corpus(_get_pipe()) # ────────────────────────────────────────────────────────────────────── # v5 — WhatsApp live ingest endpoint. # Receives thread payloads from the Node sidecar # (tau_rag/scrapers/whatsapp_sidecar/index.js). The sidecar handles: # session, message buffering, thread segmentation, and anonymization. # Here we just authenticate, validate Hebrew quality, write to the # scraped corpus JSONL, and inject into the live pipeline so the new # thread is immediately searchable without a server restart. # ────────────────────────────────────────────────────────────────────── class _WhatsappLivePayload(BaseModel): # type: ignore id: str text: str metadata: Dict[str, Any] = {} def _whatsapp_live_tokens() -> List[str]: """Active HMAC keys. Returns a list so we can support TWO valid secrets simultaneously during rotation — TAU_RAG_WA_TOKEN (primary) + TAU_RAG_WA_TOKEN_OLD (the previous one, kept valid for ~24h while sidecars roll over). Either matches → request is accepted. Empty list = endpoint disabled (safe default for unconfigured boxes).""" out = [] for env_name in ("TAU_RAG_WA_TOKEN", "TAU_RAG_WA_TOKEN_OLD"): v = _os.environ.get(env_name, "").strip() if v: out.append(v) return out def _whatsapp_live_token() -> str: """Backwards compat for callers that just want "is auth configured?".""" toks = _whatsapp_live_tokens() return toks[0] if toks else "" # In-memory replay cache + idempotency cache. # Key = (timestamp, nonce) → seen | record_id → first_seen_ts. # Both are bounded LRU-ish (capped at 10K entries, drop oldest 10% on # overflow). Cheap, no extra deps, survives a single process lifetime — # which is the threat window we care about for replay. _WA_REPLAY: "OrderedDict[str, int]" = None # type: ignore # init below _WA_IDEMPOTENT: "OrderedDict[str, int]" = None # type: ignore # init below _WA_MAX_CACHE = 10_000 _WA_CLOCK_SKEW_S = 300 # accept timestamps within ±5min of now def _wa_caches(): """Lazy-initialize the two caches at first use.""" global _WA_REPLAY, _WA_IDEMPOTENT if _WA_REPLAY is None: from collections import OrderedDict _WA_REPLAY = OrderedDict() _WA_IDEMPOTENT = OrderedDict() return _WA_REPLAY, _WA_IDEMPOTENT def _wa_cache_evict(d): """Drop oldest 10% when cache exceeds cap.""" if len(d) > _WA_MAX_CACHE: n_drop = max(1, _WA_MAX_CACHE // 10) for _ in range(n_drop): try: d.popitem(last=False) except Exception: break def _wa_verify_hmac( body_bytes: bytes, ts_header: str, nonce_header: str, sig_header: str, ) -> Tuple[bool, str]: """Verify the HMAC + timestamp + nonce. Returns (ok, reason). The signature is HMAC-SHA256 over `..` keyed by the shared token. The sidecar computes the same hash — anyone who intercepts a request can't replay it (the (ts,nonce) pair is rejected on second use) and can't forge a new one (no key). """ import hmac as _hm import hashlib as _hl secrets = _whatsapp_live_tokens() if not secrets: return False, "ingest_disabled — set TAU_RAG_WA_TOKEN" if not (ts_header and nonce_header and sig_header): return False, "missing_auth_headers" # 1. Timestamp window check try: ts = int(ts_header) except Exception: return False, "bad_timestamp" now = int(time.time()) if abs(now - ts) > _WA_CLOCK_SKEW_S: return False, f"timestamp_skew_{now - ts}s" # 2. Replay check — (ts, nonce) must be unique replay_cache, _ = _wa_caches() key = f"{ts}.{nonce_header}" if key in replay_cache: return False, "replay_detected" # 3. HMAC check — accept either active token. Compare both in # constant time before deciding to avoid leaking which one matched. msg = f"{ts}.{nonce_header}.".encode("utf-8") + body_bytes matched = False for secret in secrets: expected_sig = _hm.new( secret.encode("utf-8"), msg, _hl.sha256 ).hexdigest() if _hm.compare_digest(expected_sig, sig_header): matched = True # Don't break — still iterate the rest in constant-ish time if not matched: return False, "hmac_mismatch" # All checks passed — record nonce so it can't be replayed replay_cache[key] = now _wa_cache_evict(replay_cache) return True, "ok" # Persistent replay/idempotency cache — survives uvicorn restarts so a # 5-minute restart window can't be exploited to replay an old (ts,nonce). # Format: JSONL where each line is {"k": "", "ts": }. We only # keep the last _WA_MAX_CACHE entries; older lines are pruned on rotate. # (Local import — `Path` isn't imported at module level in this file; # every other path-using function imports it locally too.) from pathlib import Path as _PathWA _WA_REPLAY_DISK = (_PathWA(__file__).resolve().parent.parent / "runtime" / "scraped" / "wa_replay.jsonl") # wa_replay.jsonl rotation — append-only files grow forever. Once the # file passes _WA_REPLAY_MAX_BYTES, we rewrite it keeping ONLY entries # from the last 24h (which is also the load-cutoff window). Cheap — # happens every ~10K writes for most deployments. _WA_REPLAY_MAX_BYTES = 5 * 1024 * 1024 # 5 MB cap _WA_REPLAY_WRITE_COUNTER = {"n": 0} def _wa_rotate_replay_if_needed() -> None: """Compact wa_replay.jsonl by dropping entries older than 24h.""" try: if not _WA_REPLAY_DISK.exists(): return if _WA_REPLAY_DISK.stat().st_size < _WA_REPLAY_MAX_BYTES: return cutoff = int(time.time()) - 24 * 3600 import json as _ej kept_lines = [] with _WA_REPLAY_DISK.open("r", encoding="utf-8") as f: for line in f: try: rec = _ej.loads(line) if rec.get("ts", 0) >= cutoff: kept_lines.append(line) except Exception: continue tmp = _WA_REPLAY_DISK.with_suffix(".tmp") with tmp.open("w", encoding="utf-8") as f: f.writelines(kept_lines) tmp.replace(_WA_REPLAY_DISK) print(f"[tau-rag] wa_replay.jsonl rotated → {len(kept_lines)} " f"entries kept from last 24h") except Exception as e: print(f"[tau-rag] wa_replay rotation failed: {e}") def _wa_disk_persist(kind: str, key: str, ts: int) -> None: try: _WA_REPLAY_DISK.parent.mkdir(parents=True, exist_ok=True) with _WA_REPLAY_DISK.open("a", encoding="utf-8") as f: f.write(f'{{"kind":"{kind}","k":"{key}","ts":{ts}}}\n') _WA_REPLAY_WRITE_COUNTER["n"] += 1 # Check size every 1000 writes — file ops aren't free if _WA_REPLAY_WRITE_COUNTER["n"] % 1000 == 0: _wa_rotate_replay_if_needed() except Exception: pass # disk-write is best-effort — in-memory cache still works def _wa_disk_load() -> None: """One-shot load on first ingest — repopulates in-memory caches from the JSONL so a restart doesn't open a replay window.""" if not _WA_REPLAY_DISK.exists(): return replay, idem = _wa_caches() cutoff = int(time.time()) - 24 * 3600 # only entries from last 24h try: import json as _ej with _WA_REPLAY_DISK.open("r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _ej.loads(line) if rec.get("ts", 0) < cutoff: continue if rec["kind"] == "replay": replay[rec["k"]] = rec["ts"] elif rec["kind"] == "idem": idem[rec["k"]] = rec["ts"] except Exception: continue except Exception: pass # Stats — visible via /v1/scrapers/whatsapp/stats. Pure counters, # in-memory; cheap reset on process restart. # (Local time import — `time` isn't imported at module level here.) import time as _wa_time _WA_STATS = { "started_ts": int(_wa_time.time()), "received": 0, "kept": 0, "duplicates": 0, "rejected_auth": 0, "rejected_quality": 0, "rejected_pii": 0, "redacted_pii_count": 0, "live_inject_failed": 0, } @app.get("/v1/scrapers/whatsapp/stats") def whatsapp_live_stats(): """Counter snapshot for monitoring — what's the sidecar producing?""" out = dict(_WA_STATS) out["uptime_s"] = int(time.time()) - out["started_ts"] return out # Token-bucket rate limiter — per (source_ip, sidecar token-prefix). # A sidecar that's malfunctioning (or a malicious caller with a leaked # token) can't flood the pipeline. Default: 60 requests/minute, burst 20. # Way above any reasonable WhatsApp group activity (real groups produce # 0-5 threads/minute), but well below DoS territory. _WA_RATE_BUCKETS: Dict[str, Dict[str, float]] = {} _WA_RATE_PER_MIN = 60 _WA_RATE_BURST = 20 def _wa_rate_limit_key(request) -> str: # type: ignore """Rate-limit by (IP, first 8 chars of signature). Distinct sidecars get distinct buckets even from the same IP (e.g. localhost).""" ip = (request.headers.get("x-forwarded-for") or (request.client.host if request.client else "unknown")) sig_prefix = (request.headers.get("x-tau-signature", "") or "")[:8] return f"{ip}:{sig_prefix}" def _wa_check_rate_limit(request) -> Tuple[bool, str]: # type: ignore """Token-bucket: refill at rate/min, capacity = burst.""" key = _wa_rate_limit_key(request) now = time.time() bucket = _WA_RATE_BUCKETS.get(key) if bucket is None: bucket = {"tokens": float(_WA_RATE_BURST), "ts": now} _WA_RATE_BUCKETS[key] = bucket # Refill since last check elapsed = now - bucket["ts"] bucket["tokens"] = min( float(_WA_RATE_BURST), bucket["tokens"] + elapsed * (_WA_RATE_PER_MIN / 60.0), ) bucket["ts"] = now if bucket["tokens"] < 1.0: return False, f"rate_limit ({_WA_RATE_PER_MIN}/min, burst {_WA_RATE_BURST})" bucket["tokens"] -= 1.0 # Cheap occasional cleanup — drop buckets idle for >10 min if len(_WA_RATE_BUCKETS) > 100: cutoff = now - 600 for k in list(_WA_RATE_BUCKETS.keys()): if _WA_RATE_BUCKETS[k]["ts"] < cutoff: del _WA_RATE_BUCKETS[k] return True, "ok" @app.post("/v1/scrapers/whatsapp/ingest") async def whatsapp_live_ingest( payload: _WhatsappLivePayload, request: Request, # type: ignore # imported earlier in fastapi_app ): """Receive a single thread from the WhatsApp sidecar. Auth: HMAC-SHA256 over '..' keyed by TAU_RAG_WA_TOKEN. Headers required: X-TAU-Timestamp: unix seconds (must be within ±5min) X-TAU-Nonce: random per-request (any string; replay-checked) X-TAU-Signature: hex(HMAC) Pipeline applied to the payload text: HMAC verify → idempotency → Hebrew-quality → PII redaction → JSONL append → live inject (in threadpool, non-blocking) """ _WA_STATS["received"] += 1 # First-ever call: rehydrate caches from disk so a restart doesn't # open a replay window if _WA_STATS["received"] == 1: _wa_disk_load() # Rate limit BEFORE HMAC verify — protect against compute-intensive # crypto attacks (HMAC + body-hash on every request would otherwise # be the DoS surface). rl_ok, rl_reason = _wa_check_rate_limit(request) if not rl_ok: _WA_STATS["rejected_auth"] += 1 # bucketed with auth rejects return {"ok": False, "error": "rate_limited", "reason": rl_reason} # Read raw body so we HMAC the exact bytes the sidecar signed raw = await request.body() ok, reason = _wa_verify_hmac( raw, request.headers.get("x-tau-timestamp", ""), request.headers.get("x-tau-nonce", ""), request.headers.get("x-tau-signature", ""), ) if not ok: _WA_STATS["rejected_auth"] += 1 return {"ok": False, "error": "unauthorized", "reason": reason} # Persist the accepted nonce so a restart can't replay it try: ts_h = int(request.headers.get("x-tau-timestamp", "0")) nonce_h = request.headers.get("x-tau-nonce", "") if ts_h and nonce_h: _wa_disk_persist("replay", f"{ts_h}.{nonce_h}", ts_h) except Exception: pass # Idempotency — the same payload.id within the same process lifetime # is a no-op. The sidecar retries on transient errors and we don't # want double-injection. _, idem_cache = _wa_caches() if payload.id in idem_cache: _WA_STATS["duplicates"] += 1 return {"ok": True, "duplicate": True, "id": payload.id, "first_seen_ts": idem_cache[payload.id]} now_ts = int(time.time()) idem_cache[payload.id] = now_ts _wa_cache_evict(idem_cache) _wa_disk_persist("idem", payload.id, now_ts) # Validate Hebrew quality — same gate the file-based scraper uses try: from ..scrapers.core import hebrew_quality_score q = hebrew_quality_score(payload.text or "") if not q.get("ok"): _WA_STATS["rejected_quality"] += 1 return {"ok": False, "error": "quality_gate_failed", "diagnose": q} except Exception: pass # fail-open: scraper module not loadable shouldn't break ingest # PII redaction — strip Israeli ID numbers, phones, emails, IBANs, # credit cards, bank accounts, vehicle plates from the text BEFORE # we persist anything. Counts are added to metadata + global stats # so we can monitor what the redactor is catching. clean_text = payload.text or "" pii_counts: Dict[str, int] = {} try: from ..scrapers.pii_redactor import redact_pii clean_text, pii_counts = redact_pii(clean_text) if pii_counts: total = sum(pii_counts.values()) _WA_STATS["redacted_pii_count"] += total except Exception: pass # fail-open: redactor unavailable shouldn't drop the message # Re-validate quality AFTER redaction — if the redactor stripped so # much the remaining text is mostly tokens, drop it. try: from ..scrapers.core import hebrew_quality_score as _hq2 q2 = _hq2(clean_text) if not q2.get("ok"): _WA_STATS["rejected_pii"] += 1 return {"ok": False, "error": "post_redaction_quality_failed", "diagnose": q2, "pii": pii_counts} except Exception: pass # Append to the same JSONL the file-based scraper writes — autoload # on next restart picks it up; live inject below makes it usable now. import json as _wj from pathlib import Path as _PWA from ..core.types import Document as _DocW from ..pipeline import get_pipeline as _pipe_w here = _PWA(__file__).resolve().parent.parent out_path = here / "runtime" / "scraped" / "scraped_corpus.jsonl" out_path.parent.mkdir(parents=True, exist_ok=True) rec = { "id": payload.id, "text": clean_text, "metadata": dict(payload.metadata or {}), } rec["metadata"]["source"] = rec["metadata"].get("source", "whatsapp_live") rec["metadata"]["ingested_at"] = int(time.time()) # type: ignore if pii_counts: rec["metadata"]["pii_redacted"] = pii_counts try: with out_path.open("a", encoding="utf-8") as f: f.write(_wj.dumps(rec, ensure_ascii=False) + "\n") except Exception as e: return {"ok": False, "error": f"write_failed: {type(e).__name__}: {e}"} # Live inject — so the thread is searchable immediately. Runs in a # threadpool so chunking + indexing don't block the event loop. # If the inject fails, the JSONL persistence still ensures the # record is loaded on the next server restart. from starlette.concurrency import run_in_threadpool def _do_inject(): pipe = _pipe_w() # Domain classify, mirroring the autoload behavior try: from ..domain_classifier import classify as _cls_wa res = _cls_wa(clean_text) if res.get("top"): rec["metadata"]["domain"] = res["top"] rec["metadata"]["domain_scores"] = res.get("scores", {}) except Exception: pass doc = _DocW(id=payload.id, text=clean_text, metadata=rec["metadata"]) chunker = _os.environ.get("TAU_RAG_AUTOLOAD_CHUNKER", "legal_hebrew") return pipe.add_documents([doc], chunker=chunker) try: n_chunks = await run_in_threadpool(_do_inject) _WA_STATS["kept"] += 1 return {"ok": True, "persisted": True, "n_chunks": n_chunks, "id": payload.id, "pii_redacted": pii_counts} except Exception as e: _WA_STATS["live_inject_failed"] += 1 return {"ok": True, "persisted": True, "live_inject_failed": f"{type(e).__name__}: {e}", "pii_redacted": pii_counts} # ────────────────────────────────────────────────────────────────────── # v5 — Corpus inventory + existence lookup # Used by the sidebar widget to show "X פסקי דין • Y חוקים" and a # search-as-you-type "do we have …?" lookup. # ────────────────────────────────────────────────────────────────────── def _corpus_inventory_snapshot() -> Dict[str, Any]: """Walk indexed docs once, bucket by type. Cheap O(n_docs) — for 134K docs this is ~30ms. Cached for 60s in process memory.""" import time as _ti cache = getattr(_corpus_inventory_snapshot, "_cache", None) if cache and _ti.time() - cache["ts"] < 60: return cache["data"] from ..pipeline import get_pipeline pipe = get_pipeline() docs = list(getattr(pipe, "_indexed_docs", []) or []) # Buckets — by case_type metadata, with fallbacks n_judgments = n_laws = n_dover = n_whatsapp = n_other = 0 by_court: Dict[str, int] = {} by_source: Dict[str, int] = {} for d in docs: md = getattr(d, "metadata", {}) or {} ct = (md.get("case_type") or "").strip() src = (md.get("source") or "").strip() court = (md.get("court") or "").strip() if ct == "חוק": n_laws += 1 elif ct == "הודעת דוברות": n_dover += 1 elif ct == "שאלת ייעוץ": n_whatsapp += 1 elif ct in ("פסק דין", "החלטה") or src in ("court_verdicts",) or not src: n_judgments += 1 else: n_other += 1 if court: by_court[court] = by_court.get(court, 0) + 1 if src: by_source[src] = by_source.get(src, 0) + 1 # v2.97.1 (Day 7) — Include Tier B shard counts. Tier A is the # 17K curated corpus loaded into pipe._indexed_docs; Tier B is # 525K+ docs lazy-loaded by ShardRouter. The UI sidebar shows # the grand total so users see the real coverage. tier_b_total = 0 tier_b_shards: Dict[str, int] = {} tier_b_errors: list = [] # surface errors so we can diagnose try: import json as _json # local — module-level alias not consistent from ..retrieve.shard_router import get_shard_router from pathlib import Path as _IP router = get_shard_router() for shard_name in (router.available_shards or []): try: # Defensive: cast to Path in case _shard_dir holds str shard_root = router._shard_dir.get(shard_name) if shard_root is None: shard_root = router.shards_dir shard_root = _IP(shard_root) mp = shard_root / shard_name / "retriever_state" / "manifest.json" if not mp.exists(): tier_b_errors.append( f"{shard_name}: manifest missing at {mp}") continue m = _json.loads(mp.read_text(encoding="utf-8")) n = int(m.get("n_docs", 0)) tier_b_shards[shard_name] = n tier_b_total += n except Exception as ie: tier_b_errors.append( f"{shard_name}: {type(ie).__name__}: {ie}") except Exception as e: tier_b_errors.append(f"router init: {type(e).__name__}: {e}") if tier_b_errors: print(f"[corpus_inventory] tier_b errors: {tier_b_errors[:3]}") data = { # Top-level `total` now reflects Tier A + Tier B combined, # which is what the user actually has access to via queries. "total": len(docs) + tier_b_total, "tier_a": len(docs), # curated corpus (full pipeline) "tier_b": tier_b_total, # sharded fallback (BM25 + dense) "tier_b_breakdown": dict(sorted(tier_b_shards.items(), key=lambda kv: -kv[1])), "tier_b_errors": tier_b_errors[:5] if tier_b_errors else [], "judgments": n_judgments + tier_b_total, # all shards are judgments "laws": n_laws, "dover": n_dover, "whatsapp": n_whatsapp, "other": n_other, "by_court": dict(sorted(by_court.items(), key=lambda kv: -kv[1])), "by_source": dict(sorted(by_source.items(), key=lambda kv: -kv[1])), "fetched_at": int(_ti.time()), } _corpus_inventory_snapshot._cache = {"ts": _ti.time(), "data": data} return data @app.get("/v1/corpus/inventory") def corpus_inventory(): """Counter widget: total docs, judgments, laws, plus per-court / per-source breakdowns. Cached 60s.""" try: return _corpus_inventory_snapshot() except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} # ────────────────────────────────────────────────────────────────────── # Inverted-trigram index for /v1/corpus/exists. # Prior implementation did O(n_docs) SequenceMatcher across 134K docs # on every keystroke — ~50-100ms each. The widget runs this on each # keystroke (with debounce) so the UX was sluggish. # # New approach: build a trigram index ONCE per indexed-docs fingerprint # {trigram → set(doc_idx)}. Lookup intersects the query's trigrams to # get a short candidate list, then SequenceMatcher only those (typically # <50). Rebuilds lazily when the doc count changes. # ────────────────────────────────────────────────────────────────────── _EXISTS_INDEX: Dict[str, Any] = { "fingerprint": None, # cache key (n_docs) "trigrams": {}, # str → set[int] (doc indices) "haystack": [], # parallel array of search-tuples } def _trigrams(s: str) -> set: s = (s or "").lower() if len(s) < 3: return {s} if s else set() return {s[i:i+3] for i in range(len(s) - 2)} def _build_exists_index(docs): """O(n_docs) one-time pass — builds trigram → doc_idx mapping over the searchable surface (id + title). Skips empty entries.""" tri: Dict[str, set] = {} haystack: List[Dict[str, Any]] = [] for i, d in enumerate(docs): md = getattr(d, "metadata", {}) or {} did = str(getattr(d, "id", "")) title = str(md.get("title") or "") case_type = str(md.get("case_type") or "") url = md.get("url") # Searchable surface — concat id + title, lowercased surface = f"{did} {title}".lower() haystack.append({ "idx": i, "id": did, "title": title or did, "case_type": case_type, "url": url, "surface": surface, }) for tg in _trigrams(surface): tri.setdefault(tg, set()).add(i) return tri, haystack def _exists_index_get(docs): fp = len(docs) if _EXISTS_INDEX["fingerprint"] != fp: tri, haystack = _build_exists_index(docs) _EXISTS_INDEX["fingerprint"] = fp _EXISTS_INDEX["trigrams"] = tri _EXISTS_INDEX["haystack"] = haystack return _EXISTS_INDEX["trigrams"], _EXISTS_INDEX["haystack"] @app.get("/v1/corpus/exists") def corpus_exists(q: str = "", kind: str = "any", limit: int = 10): """Check whether a specific case ID or law name exists in the corpus. kind: "law" — match by metadata.title with case_type='חוק' "case" — match by doc_id substring (case identifier like 1234/22) "any" — try both, return the strongest match Performance: trigram-indexed candidate filter → SequenceMatcher only on the ~50 best candidates. ~5ms vs ~50-100ms for the previous linear scan over 134K docs. Returns: {"found": bool, "matches": [{"id", "title", "kind", "score"} ...]} """ if not q or not q.strip(): return {"found": False, "matches": [], "error": "missing query"} from difflib import SequenceMatcher from ..pipeline import get_pipeline needle = q.strip() needle_lc = needle.lower() pipe = get_pipeline() docs = list(getattr(pipe, "_indexed_docs", []) or []) if not docs: return {"found": False, "matches": [], "n_total_searched": 0} tri, haystack = _exists_index_get(docs) # Trigram intersection → candidate set. Substring-match (exact) is # always a candidate via direct loop, so a 9-char doc_id query that # happens to share trigrams with hundreds of docs still resolves. q_grams = _trigrams(needle_lc) candidates: set = set() if q_grams: # Sum membership: a doc is a candidate if it shares at least # one trigram. Cheap and good-recall. for tg in q_grams: candidates.update(tri.get(tg, set())) # Always add direct substring hits — covers ID lookups too short for # trigrams ("/22" alone has one trigram and may match too much). if len(candidates) < 1000: # don't double-scan if we already have plenty for h in haystack: if needle_lc in h["surface"]: candidates.add(h["idx"]) matches: List[Dict[str, Any]] = [] for idx in candidates: h = haystack[idx] title = h["title"] ct = h["case_type"] did = h["id"] surface = h["surface"] score = 0.0 match_kind = None # Direct substring → very high confidence if needle_lc in surface: score = 0.95 match_kind = "case" if needle_lc in did.lower() else "law" # Fuzzy match on title for laws (kind=law/any) if kind in ("law", "any") and ct == "חוק" and title: fuzzy = SequenceMatcher(None, needle_lc, title.lower()).ratio() if fuzzy > score: score = fuzzy match_kind = "law" if score >= 0.6 and match_kind: if kind != "any" and match_kind != kind: continue matches.append({ "id": did, "title": title or did, "kind": match_kind, "score": round(score, 3), "url": h["url"], }) matches.sort(key=lambda m: -m["score"]) return { "found": bool(matches), "n_total_searched": len(docs), "n_candidates": len(candidates), "matches": matches[:max(1, limit)], } # ────────────────────────────────────────────────────────────────────── # v5 — Lawyer Q&A endpoint with relevance gate # Wraps the existing strategy synthesizer with a guard: if the question # isn't in our covered legal domains OR retrieval confidence is too low, # we return a clear "this is outside our corpus" response instead of a # misleading low-quality answer. Saves users from acting on bad info. # ────────────────────────────────────────────────────────────────────── class _LawyerQARequest(BaseModel): # type: ignore # BaseModel imported earlier in file question: str side: Optional[str] = "plaintiff" top_k: int = 10 # Three-tier thresholds (was a single `min_relevance` cliff). Below # `out_of_scope_below` we refuse outright; between that and # `borderline_below` we answer WITH a clear caveat; above that we # answer confidently. out_of_scope_below: float = 0.20 borderline_below: float = 0.35 # Two-phase rendering: when `early=true`, the endpoint returns just # the cheap signals (domain classification + retrieval probe + top # hits) and SKIPS the synthesizer. The frontend uses this to show # an immediate "we found these cases, generating arguments..." # screen, then re-queries with early=false for the full result. # Cuts perceived latency from 5-10s to ~1s for the first paint. early: bool = False # ────────────────────────────────────────────────────────────────────── # Permalink store for /v1/lawyer/ask answers. # # Why we need it: # localStorage keeps history per-browser, but lawyers want to SHARE an # answer with a colleague. Without server-side storage there's nothing # to share. This adds a small SQLite table with content-hashed IDs, # no auth (read-only by ID), and a 90-day retention policy. # # IDs are 12-char base32 over a sha256(q + side + response_hash + nonce) # so they're unguessable but compact (e.g. "k7m4n8q2p3r5"). A leaked # ID gives the leaker access to that ONE answer — no auth scope creep. # ────────────────────────────────────────────────────────────────────── from pathlib import Path as _PathQA _LAWYER_QA_DB = (_PathQA(__file__).resolve().parent.parent / "runtime" / "lawyer_qa.db") def _lawyer_qa_db(): """Lazy-init SQLite + connection. Cheap — ~1ms first call, cached. Schema includes a small in-place migration for v0 → v1 (adding view_count + last_viewed_at columns) so existing DBs don't break.""" import sqlite3 if not getattr(_lawyer_qa_db, "_conn", None): _LAWYER_QA_DB.parent.mkdir(parents=True, exist_ok=True) c = sqlite3.connect(str(_LAWYER_QA_DB), check_same_thread=False) c.execute(""" CREATE TABLE IF NOT EXISTS lawyer_qa( id TEXT PRIMARY KEY, question TEXT NOT NULL, side TEXT, response TEXT NOT NULL, confidence TEXT, relevance REAL, created_at INTEGER NOT NULL, ip_hash TEXT, view_count INTEGER DEFAULT 0, last_viewed_at INTEGER ) """) c.execute(""" CREATE INDEX IF NOT EXISTS idx_lawyer_qa_created ON lawyer_qa(created_at) """) # Index for popular ranking — created lazily so old DBs upgrade c.execute(""" CREATE INDEX IF NOT EXISTS idx_lawyer_qa_popular ON lawyer_qa(view_count DESC, last_viewed_at DESC) """) # In-place migration for DBs created before view_count existed. # ALTER ADD COLUMN is a no-op if the column already exists in # newer SQLite, but older versions raise — try-catch handles it. for col, ddl in ( ("view_count", "ALTER TABLE lawyer_qa ADD COLUMN view_count INTEGER DEFAULT 0"), ("last_viewed_at", "ALTER TABLE lawyer_qa ADD COLUMN last_viewed_at INTEGER"), ): try: c.execute(ddl) except Exception: pass # column exists already c.commit() _lawyer_qa_db._conn = c return _lawyer_qa_db._conn def _gen_qa_id(q: str, side: str, response: dict) -> str: """Content-hashed ID (12 chars base32) — collision-resistant for realistic volumes (~10^7 answers before 50% collision probability).""" import hashlib, base64, os, json as _ej payload = f"{q}|{side}|{_ej.dumps(response, sort_keys=True, ensure_ascii=False)[:200]}|{os.urandom(8).hex()}" digest = hashlib.sha256(payload.encode("utf-8")).digest() # base32 lower-case, no padding, alphanumeric-ish (good for URLs) return base64.b32encode(digest)[:12].decode("ascii").lower() def _save_lawyer_answer(q: str, side: str, response: dict, request) -> Optional[str]: """Persist + return permalink ID. Best-effort — failure to save shouldn't block the answer from being returned to the user.""" import json as _ej import hashlib if not response or not response.get("answered"): return None # don't save out_of_scope; nothing useful to share try: conn = _lawyer_qa_db() qa_id = _gen_qa_id(q, side, response) # IP hash — privacy-preserving "who saved this" for abuse tracking, # not user identification. Salted with a process-life secret. ip = (request.client.host if request and request.client else "") ip_h = hashlib.sha256(f"tau-rag-qa:{ip}".encode("utf-8")).hexdigest()[:16] conn.execute( "INSERT INTO lawyer_qa(id, question, side, response, " "confidence, relevance, created_at, ip_hash) " "VALUES (?, ?, ?, ?, ?, ?, ?, ?)", (qa_id, q, side, _ej.dumps(response, ensure_ascii=False), response.get("confidence"), float(response.get("relevance") or 0.0), int(time.time()), ip_h), ) conn.commit() return qa_id except Exception as e: print(f"[lawyer-qa] save failed: {e}") return None @app.get("/v1/lawyer/answer/{qa_id}") def lawyer_answer_lookup(qa_id: str): """Retrieve a previously saved Q&A by permalink ID. No auth — the ID itself is the secret. 404 if not found or expired. Side effect: bumps view_count and last_viewed_at so the popular endpoint can rank answers users actually return to. """ if not qa_id or not qa_id.replace("-", "").replace("_", "").isalnum(): return {"ok": False, "error": "bad_id"} import json as _ej try: conn = _lawyer_qa_db() row = conn.execute( "SELECT question, side, response, created_at, " "view_count, last_viewed_at FROM lawyer_qa " "WHERE id = ?", (qa_id,), ).fetchone() if not row: return {"ok": False, "error": "not_found"} q, side, response_json, created_at, view_count, last_viewed_at = row if created_at < int(time.time()) - 90 * 86400: return {"ok": False, "error": "expired"} # Bump view counters — best-effort, don't block response on # write failure. now_ts = int(time.time()) try: conn.execute( "UPDATE lawyer_qa SET view_count = COALESCE(view_count,0)+1, " "last_viewed_at = ? WHERE id = ?", (now_ts, qa_id), ) conn.commit() except Exception: pass return { "ok": True, "id": qa_id, "question": q, "side": side, "created_at": created_at, "view_count": (view_count or 0) + 1, "last_viewed_at": now_ts, "response": _ej.loads(response_json), } except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @app.get("/v1/lawyer/popular") def lawyer_popular(limit: int = 10, days: int = 30): """Most-viewed Q&As within the last `days` window. Used by the sidebar to surface "questions colleagues keep coming back to" (or just "answers I keep referencing" in a single-user setup). Sort key: view_count DESC, last_viewed_at DESC. Filter: only entries with view_count >= 1 AND created within the rolling window. Never includes expired entries. """ limit = max(1, min(50, int(limit or 10))) days = max(1, min(90, int(days or 30))) cutoff = int(time.time()) - days * 86400 try: conn = _lawyer_qa_db() rows = conn.execute( "SELECT id, question, side, confidence, relevance, " "created_at, view_count, last_viewed_at " "FROM lawyer_qa " "WHERE created_at >= ? AND view_count >= 1 " "ORDER BY view_count DESC, last_viewed_at DESC " "LIMIT ?", (cutoff, limit), ).fetchall() items = [ { "id": r[0], "question": r[1], "side": r[2], "confidence": r[3], "relevance": r[4], "created_at": r[5], "view_count": r[6] or 0, "last_viewed_at": r[7], } for r in rows ] return {"ok": True, "items": items, "window_days": days} except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} def _lawyer_qa_prune() -> int: """Cron-like cleanup — runs on first ingest after 24h. Removes answers older than 90 days. Returns count removed.""" try: conn = _lawyer_qa_db() cutoff = int(time.time()) - 90 * 86400 cur = conn.execute( "DELETE FROM lawyer_qa WHERE created_at < ?", (cutoff,) ) conn.commit() return cur.rowcount or 0 except Exception: return 0 # ────────────────────────────────────────────────────────────────────── # Doctrine-cluster endpoints — Level 6 of the hierarchical legal graph. # # Builds doctrine clusters on demand from the citation network. # `cluster_for_query` is the routing primitive used by the future # GraphArgumentBuilder — given a question's retrieved hits, find which # cluster they fall in. # ────────────────────────────────────────────────────────────────────── @app.get("/v1/clusters") def list_doctrine_clusters(limit: int = 20, domain: Optional[str] = None): """Top-N doctrine clusters by anchor in-degree.""" try: from ..pipeline import get_pipeline from ..clustering import get_or_build_clusters, cluster_summary pipe = get_pipeline() clusters = get_or_build_clusters(pipe) if domain: clusters = [c for c in clusters if c.domain == domain] return { "ok": True, "n_total": len(clusters), "items": [cluster_summary(c) for c in clusters[:max(1, limit)]], } except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @app.get("/v1/clusters/{cluster_id}") def get_doctrine_cluster(cluster_id: str, max_members: int = 100): """Detail view for a single cluster — anchor, applications, origins. Each member is returned twice for backward compat: as a bare ID list (`applications`, `origins`) and as a resolved-meta list with `{id, title, domain, kind, is_co_cite}` shape (`applications_meta`, `origins_meta`). The frontend uses `_meta` when present and falls back to the bare list otherwise. """ try: from ..pipeline import get_pipeline from ..clustering import (get_or_build_clusters, cluster_summary, resolve_cluster_members) pipe = get_pipeline() clusters = get_or_build_clusters(pipe) match = next((c for c in clusters if c.cluster_id == cluster_id), None) if not match: return {"ok": False, "error": "not_found"} s = cluster_summary(match) apps = match.applications[:max_members] ors = match.origins[:max_members] s["applications"] = apps s["origins"] = ors docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) co_cite_added = (match.extra or {}).get("co_cite_added") or [] s["applications_meta"] = resolve_cluster_members( docs, apps, co_cite_added=co_cite_added) s["origins_meta"] = resolve_cluster_members( docs, ors, co_cite_added=co_cite_added) return {"ok": True, "cluster": s} except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @app.get("/v1/clusters/by-doc/{doc_id:path}") def get_clusters_for_doc(doc_id: str): """Inverse routing — given a document ID, return every doctrine cluster it belongs to with the role it plays. Useful when a lawyer has a specific case in mind and wants to see which doctrines it sits inside ("apropim is both an anchor of `parshanut-tachlitit` and an origin of `om-lev`"). """ try: from ..pipeline import get_pipeline from ..clustering import (get_or_build_clusters, cluster_summary, find_clusters_for_doc) pipe = get_pipeline() clusters = get_or_build_clusters(pipe) rows = find_clusters_for_doc(clusters, doc_id) return { "ok": True, "doc_id": doc_id, "n_clusters": len(rows), "items": [ {**cluster_summary(c), "role": role} for c, role in rows ], } except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} # ────────────────────────────────────────────────────────────────────── # Hierarchical graph — unified API across all 9 levels. # Wraps tau_rag.hierarchical_graph.HierarchicalLegalGraph for the web. # ────────────────────────────────────────────────────────────────────── @app.get("/v1/hgraph/stats") def hgraph_stats(): """Per-level counts: docs, clusters, citations, domain breakdown.""" try: from ..pipeline import get_pipeline from ..hierarchical_graph import get_or_build_hgraph return {"ok": True, **get_or_build_hgraph(get_pipeline()).stats()} except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @app.post("/v1/hgraph/retrieve") def hgraph_retrieve(body: Dict[str, Any]): """Multi-resolution retrieval. Body: {"query": str, "level": str|int, "k": int, "parent_id": str?} `level` is one of: paragraph, section, document, cluster, domain. """ try: q = (body.get("query") or "").strip() if not q: return {"ok": False, "error": "empty_query"} level = body.get("level", "document") k = int(body.get("k", 10)) parent_id = body.get("parent_id") from ..pipeline import get_pipeline from ..hierarchical_graph import get_or_build_hgraph hg = get_or_build_hgraph(get_pipeline()) items = hg.retrieve(q, level=level, k=k, parent_id=parent_id) return {"ok": True, "level": level, "n": len(items), "items": items} except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @app.get("/v1/hgraph/path") def hgraph_path(node_id: str, level: str = "document"): """Walk from a node up to the system root. Returns the full path.""" try: from ..pipeline import get_pipeline from ..hierarchical_graph import (get_or_build_hgraph, LEVELS) hg = get_or_build_hgraph(get_pipeline()) path = hg.path(node_id, level=level) return { "ok": True, "path": [ {"level_index": lvl, "level": LEVELS[lvl], "node_id": nid} for lvl, nid in path ], } except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @app.post("/v1/hgraph/argument") def hgraph_build_argument(body: Dict[str, Any]): """End-to-end argument assembly. Body: {"user_facts": str, "side": "plaintiff"|"defendant", "retrieval_k": int} Returns an ArgumentBundle dict — domain → cluster → anchor → quote → applications + origins → coverage. Never invokes any generative model; everything is verbatim from the corpus. """ try: facts = (body.get("user_facts") or body.get("question") or "").strip() if not facts: return {"ok": False, "error": "empty_facts"} side = body.get("side") or "plaintiff" rk = int(body.get("retrieval_k", 20)) from ..pipeline import get_pipeline from ..hierarchical_graph import get_or_build_hgraph hg = get_or_build_hgraph(get_pipeline()) bundle = hg.build_argument(facts, side=side, retrieval_k=rk) return {"ok": True, "bundle": bundle.to_dict()} except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @app.post("/v1/clusters/route") def route_query_to_cluster(body: Dict[str, Any]): """Given a query, return the top-K doctrine clusters it belongs to. Body: {"question": str, "top_k": int = 3, "retrieval_k": int = 20} Pipeline: 1. Run retrieval on the question. 2. Take top-N retrieved doc_ids. 3. Score each cluster by overlap. 4. Return top-K clusters with scores + role of each retrieved doc. """ try: q = (body.get("question") or "").strip() if not q: return {"ok": False, "error": "empty_question"} top_k = int(body.get("top_k", 3)) retrieval_k = int(body.get("retrieval_k", 20)) from ..pipeline import get_pipeline from ..core.types import Query as _Q from ..clustering import (get_or_build_clusters, cluster_for_query, cluster_summary, classify_role) pipe = get_pipeline() retrieved = pipe.retrievers.search(_Q(text=q), k=retrieval_k) retrieved_ids = [] seen: set = set() for r in retrieved: chunk = getattr(r, "chunk", None) or r did = (getattr(chunk, "doc_id", None) or getattr(r, "doc_id", None)) if did and did not in seen: seen.add(did); retrieved_ids.append(did) clusters = get_or_build_clusters(pipe) ranked = cluster_for_query(clusters, retrieved_ids, top_k=top_k) return { "ok": True, "n_retrieved": len(retrieved_ids), "results": [ { **cluster_summary(c), "score": round(score, 4), "retrieved_in_cluster": [ {"doc_id": did, "role": classify_role(c, did)} for did in retrieved_ids if did in set(c.members()) ], } for c, score in ranked ], } except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} # ────────────────────────────────────────────────────────────────────── # Aggregated system dashboard — single endpoint pulls signals from # every sub-system (corpus, sidecar, Q&A store, pipeline) so the # frontend can render a one-pane operational view without firing 6 # separate requests. # # Each subsystem block is fault-isolated: if `/v1/scrapers/whatsapp/stats` # would error, only that block returns {"error": ...}; the others still # populate. Lets the dashboard render partial rather than spinning. # ────────────────────────────────────────────────────────────────────── @app.get("/v1/system/dashboard") def system_dashboard(): """One-shot health snapshot. ~10ms total since each piece is already cached or trivial.""" import time as _time_dash out: Dict[str, Any] = {"ts": int(_time_dash.time())} # Corpus inventory — reuses the cached snapshot try: out["corpus"] = _corpus_inventory_snapshot() except Exception as e: out["corpus"] = {"error": f"{type(e).__name__}: {e}"} # WhatsApp sidecar stats (server side — what we received) try: out["whatsapp"] = { "uptime_s": int(_time_dash.time()) - _WA_STATS["started_ts"], "received": _WA_STATS["received"], "kept": _WA_STATS["kept"], "duplicates": _WA_STATS["duplicates"], "rejected_auth": _WA_STATS["rejected_auth"], "rejected_quality": _WA_STATS["rejected_quality"], "rejected_pii": _WA_STATS["rejected_pii"], "redacted_pii_count": _WA_STATS["redacted_pii_count"], "live_inject_failed": _WA_STATS["live_inject_failed"], "ingest_configured": bool(_whatsapp_live_token()), } except Exception as e: out["whatsapp"] = {"error": f"{type(e).__name__}: {e}"} # Lawyer Q&A store — counts by confidence + total views try: conn = _lawyer_qa_db() cur = conn.execute(""" SELECT COUNT(*) AS total, SUM(CASE WHEN confidence='high' THEN 1 ELSE 0 END) AS n_high, SUM(CASE WHEN confidence='borderline' THEN 1 ELSE 0 END) AS n_border, SUM(COALESCE(view_count, 0)) AS total_views, COUNT(DISTINCT ip_hash) AS distinct_ips, MAX(created_at) AS last_saved_at FROM lawyer_qa """) row = cur.fetchone() out["lawyer_qa"] = { "total": row[0] or 0, "n_high": row[1] or 0, "n_borderline": row[2] or 0, "total_views": row[3] or 0, "distinct_ips": row[4] or 0, "last_saved_at": row[5], } except Exception as e: out["lawyer_qa"] = {"error": f"{type(e).__name__}: {e}"} # Pipeline / retriever health — minimal indicators try: from ..pipeline import get_pipeline pipe = get_pipeline() n_docs = len(getattr(pipe, "_indexed_docs", []) or []) retrievers = getattr(pipe.retrievers, "_retrievers", None) n_retrievers = (len(retrievers) if isinstance(retrievers, dict) else (1 if pipe.retrievers else 0)) out["pipeline"] = { "indexed_docs": n_docs, "n_retrievers": n_retrievers, "ready": n_docs > 0, } except Exception as e: out["pipeline"] = {"error": f"{type(e).__name__}: {e}"} # Build progress (set during corpus autoload — user can see "still # indexing" if they hit /v1/system/dashboard mid-startup) try: out["build_progress"] = public_build_progress() except Exception: out["build_progress"] = {"phase": "unknown"} # Recent scrapes — count records in scraped_corpus.jsonl by source try: from pathlib import Path as _PSC import json as _scj path = (_PSC(__file__).resolve().parent.parent / "runtime" / "scraped" / "scraped_corpus.jsonl") if path.exists(): counts: Dict[str, int] = {} n_total = 0 with path.open("r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _scj.loads(line) except Exception: continue src = (rec.get("metadata") or {}).get("source") or "unknown" counts[src] = counts.get(src, 0) + 1 n_total += 1 out["scrapes"] = { "total": n_total, "by_source": counts, "file_bytes": path.stat().st_size, } else: out["scrapes"] = {"total": 0, "by_source": {}, "file_bytes": 0} except Exception as e: out["scrapes"] = {"error": f"{type(e).__name__}: {e}"} # Overall health: degraded if ANY subsystem reports an error statuses = [v for v in out.values() if isinstance(v, dict)] has_error = any("error" in s for s in statuses) pipeline_ready = bool((out.get("pipeline") or {}).get("ready")) out["status"] = ("degraded" if has_error else "ok" if pipeline_ready else "starting") return out # ────────────────────────────────────────────────────────────────────── # Multi-Q&A docx export — pack multiple saved answers into one memo. # # Use case: lawyer prepared 5 Q&As during research, wants to send a # combined memo to the client. Frontend selects N qa_ids and POSTs # them; backend assembles a single .docx with a TOC and per-Q&A # sections. # ────────────────────────────────────────────────────────────────────── class _LawyerQAExportRequest(BaseModel): # type: ignore qa_ids: List[str] title: Optional[str] = None # cover page title; defaults to date @app.post("/v1/lawyer/export-pack") def lawyer_export_pack(body: _LawyerQAExportRequest): """Return a Hebrew RTL .docx packing multiple Q&As into one memo.""" from io import BytesIO from fastapi.responses import Response import datetime as _dt if not body.qa_ids: return {"ok": False, "error": "no_ids"} # Cap — assembling 50 Q&As into a docx is rarely useful and ties up # the event loop (synchronous python-docx). if len(body.qa_ids) > 50: return {"ok": False, "error": "too_many", "max": 50} try: from docx import Document as DocxDocument from docx.shared import Pt, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn from docx.oxml import OxmlElement except ImportError: return {"ok": False, "error": "python-docx_not_installed"} # Fetch all requested Q&As import json as _ej fetched: List[Dict[str, Any]] = [] try: conn = _lawyer_qa_db() for qa_id in body.qa_ids: if not qa_id.replace("-", "").replace("_", "").isalnum(): continue row = conn.execute( "SELECT id, question, side, response, created_at " "FROM lawyer_qa WHERE id = ?", (qa_id,), ).fetchone() if not row: continue fetched.append({ "id": row[0], "question": row[1], "side": row[2], "response": _ej.loads(row[3]), "created_at": row[4], }) except Exception as e: return {"ok": False, "error": f"db: {type(e).__name__}: {e}"} if not fetched: return {"ok": False, "error": "none_found"} # ── Build the docx ────────────────────────────────────────── doc = DocxDocument() style = doc.styles['Normal'] style.font.name = 'David' style.font.size = Pt(11) rPr = style.element.get_or_add_rPr() rFonts = rPr.find(qn('w:rFonts')) if rFonts is None: rFonts = OxmlElement('w:rFonts') rPr.append(rFonts) rFonts.set(qn('w:cs'), 'David') rFonts.set(qn('w:hAnsi'), 'David') def _rtl(p): pPr = p._p.get_or_add_pPr() bidi = OxmlElement('w:bidi') bidi.set(qn('w:val'), '1') pPr.append(bidi) p.alignment = WD_ALIGN_PARAGRAPH.RIGHT # Cover page title_text = body.title or f"חוברת תשובות משפטיות · {_dt.date.today().strftime('%d/%m/%Y')}" title = doc.add_heading(title_text, level=0) _rtl(title) for run in title.runs: run.font.color.rgb = RGBColor(0x4F, 0x46, 0xE5) sub = doc.add_paragraph() _rtl(sub) r = sub.add_run(f"מכיל {len(fetched)} תשובות · נוצר על-ידי tau-rag") r.italic = True; r.font.size = Pt(10) r.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8) # Table of contents toc_h = doc.add_heading("תוכן עניינים", level=2); _rtl(toc_h) for i, qa in enumerate(fetched, 1): q = (qa["question"] or "")[:80] p = doc.add_paragraph(); _rtl(p) p.add_run(f"{i}. {q}").font.size = Pt(11) doc.add_page_break() # Per-Q&A section for i, qa in enumerate(fetched, 1): resp = qa["response"] or {} # Q heading q_h = doc.add_heading(f"{i}. {qa['question'] or 'שאלה ללא כותרת'}", level=1) _rtl(q_h) # Metadata line md_p = doc.add_paragraph(); _rtl(md_p) meta_parts = [] if qa.get("created_at"): meta_parts.append(_dt.date.fromtimestamp(qa["created_at"]).strftime("%d/%m/%Y")) if qa.get("side"): meta_parts.append(f"צד: {qa['side']}") if resp.get("confidence"): tier_he = {"high": "ביטחון גבוה", "borderline": "ביטחון בינוני", "out_of_scope": "מחוץ לקורפוס"}.get(resp["confidence"], "") if tier_he: meta_parts.append(tier_he) if resp.get("domain"): meta_parts.append(f"תחום: {resp['domain']}") r = md_p.add_run(" · ".join(meta_parts)) r.italic = True; r.font.size = Pt(9) r.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8) # Disclaimer if resp.get("disclaimer_he"): disc_p = doc.add_paragraph(); _rtl(disc_p) disc_p.paragraph_format.left_indent = Pt(12) disc_p.paragraph_format.right_indent = Pt(12) r = disc_p.add_run(resp["disclaimer_he"]) r.italic = True; r.font.size = Pt(10) # Arguments args = resp.get("arguments") or [] if args: ah = doc.add_heading("טיעונים", level=3); _rtl(ah) for j, a in enumerate(args[:5], 1): p = doc.add_paragraph(); _rtl(p) p.paragraph_format.line_spacing = 1.5 p.add_run(f"({j}) ").bold = True p.add_run(a.get("argument", "")) # Citations cits = resp.get("citations") or [] if cits: ch = doc.add_heading("ציטוטים", level=3); _rtl(ch) for c in cits[:10]: p = doc.add_paragraph(style="List Bullet"); _rtl(p) r1 = p.add_run(c.get("case_id", "")) r1.bold = True if c.get("title") and c["title"] != c.get("case_id"): p.add_run(f" · {c['title']}") if c.get("outcome"): p.add_run(f" [{c['outcome']}]").font.size = Pt(9) # Statutes stats = resp.get("statutes") or [] if stats: sh = doc.add_heading("חוקים מצוטטים", level=3); _rtl(sh) p = doc.add_paragraph(); _rtl(p) p.add_run(" · ".join( str(s) if isinstance(s, str) else s.get("name", "") for s in stats[:10] )) # Page break between Q&As (skip last) if i < len(fetched): doc.add_page_break() # Serialize buf = BytesIO() doc.save(buf) fname = f"tau-rag-qa-pack-{_dt.date.today().strftime('%Y%m%d')}.docx" return Response( content=buf.getvalue(), media_type=("application/vnd.openxmlformats-officedocument." "wordprocessingml.document"), headers={"Content-Disposition": f'attachment; filename="{fname}"'}, ) @app.delete("/v1/lawyer/answer/{qa_id}") def lawyer_answer_delete(qa_id: str, request: Request = None): # type: ignore """Hard-delete a saved Q&A. Required for GDPR right-to-erasure. Auth model: a single admin token in TAU_RAG_ADMIN_TOKEN. Without it the endpoint refuses (returns "not_configured" so a casual visitor can't enumerate). When configured, requires X-TAU-Admin-Token header to match. This is intentional minimum auth — the GDPR delete flow is rare; full RBAC isn't justified here. """ admin_token = _os.environ.get("TAU_RAG_ADMIN_TOKEN", "") if not admin_token: return {"ok": False, "error": "not_configured"} got = (request.headers.get("x-tau-admin-token") or "") if request else "" if got != admin_token: return {"ok": False, "error": "unauthorized"} if not qa_id or not qa_id.replace("-", "").replace("_", "").isalnum(): return {"ok": False, "error": "bad_id"} try: conn = _lawyer_qa_db() cur = conn.execute("DELETE FROM lawyer_qa WHERE id = ?", (qa_id,)) conn.commit() return { "ok": True, "deleted": cur.rowcount, "id": qa_id, } except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} def _format_lawyer_response( *, confidence: str, # "high" / "borderline" / "out_of_scope" relevance: float, domain_top: Optional[str], domain_score: float, n_hits: int, top_hits: Optional[List[Dict[str, Any]]] = None, syn_result: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: """Shape the response into a structure the frontend can render directly without re-parsing the synthesizer dump. Surface fields: answered, confidence, headline_he, disclaimer_he, arguments[], citations[{case_id, title, url, outcome}], statutes[], outcome_summary, raw (full synthesizer dict). Frontend renders headline/disclaimer at top, lists arguments and citations explicitly. Old response shape (`result` dict) preserved in `raw` for any caller that depended on it. """ base = { "answered": confidence != "out_of_scope", "confidence": confidence, "relevance": round(relevance, 3), "domain": domain_top, "domain_score": round(domain_score, 3), "n_corpus_hits": n_hits, # Always include top hits so the frontend can show "this is what # we found" even for out_of_scope/borderline tiers — the user # can judge relevance themselves. "top_hits": top_hits or [], } if confidence == "out_of_scope": base["headline_he"] = "השאלה אינה מכוסה בקורפוס" if top_hits: base["disclaimer_he"] = ( "השאלה רחוקה מכל פסיקה בקורפוס שלנו. " "התוצאות הקרובות ביותר שמצאנו (להלן) מקבלות ציון דמיון נמוך — " "כנראה לא רלוונטיות. כדאי לבדוק במקור אחר." ) else: base["disclaimer_he"] = ( "לא נמצאו תוצאות. ייתכן שהשאלה לא בעברית, או שאין כיסוי לתחום הזה. " "כדאי לבדוק במקור אחר (נבו, פסק דין, או מערכת המשפט)." ) return base # Extract arguments + citations from the synthesizer result for # easy frontend rendering. cba = ((syn_result or {}).get("case_based_arguments") or {}) drafts = cba.get("drafted_arguments_for_user") or [] arguments = [] citations: List[Dict[str, Any]] = [] seen_case_ids: set = set() for d in drafts[:5]: # cap — frontend renders top 5 arg_text = d.get("argument", "") or "" arguments.append({ "argument": arg_text, "polish_method": d.get("polish_method", "verbatim"), "n_sources": d.get("n_sources", 0), # v2.89.2 — judgment section the supporting paragraph came # from (arguments_plaintiff / discussion / holding / etc.). # Frontend renders this as a small chip per argument. "section_origin": d.get("section_origin"), }) # Pull case citations out of the draft metadata for src in (d.get("source_cases") or []): cid = src.get("case_id") or src.get("id") if not cid or cid in seen_case_ids: continue seen_case_ids.add(cid) citations.append({ "case_id": cid, "title": src.get("title") or cid, "url": src.get("url"), "outcome": src.get("outcome"), "score": src.get("score"), }) statutes = cba.get("statute_references") or [] outcome_stats = cba.get("outcome_distribution") or {} if confidence == "high": base["headline_he"] = "תשובה מבוססת פסיקה" base["disclaimer_he"] = ( "התשובה נבנית מתוך פסיקה דומה עובדתית. אין בה ייעוץ משפטי — " "החלטה סופית טעונה בחינה נקודתית של עו\"ד מוסמך." ) else: # borderline base["headline_he"] = "תשובה משוערת — דמיון נמוך לקורפוס" base["disclaimer_he"] = ( f"רמת הדמיון של השאלה לפסיקה הקיימת ({round(relevance, 2)}) " "נמוכה מהרגיל. הציטוטים שלהלן עשויים להיות פחות רלוונטיים — " "כדאי להצליב עם מקור משפטי נוסף לפני שימוש בתוכן." ) base.update({ "arguments": arguments, "citations": citations[:10], "statutes": statutes[:10], "outcome_summary": outcome_stats, "raw": syn_result, # full synthesizer dump for power users }) return base # ────────────────────────────────────────────────────────────────────── # SSE-streamed Q&A endpoint. # # Why this exists alongside POST /v1/lawyer/ask: # The two-phase rendering (early=true → early=false) we already have # gives a fast time-to-first-paint (~1s for signals + top_hits). But # once the synthesizer lands, all 5 arguments + 10 citations + 10 # statutes appear at once — a sudden "wall of text" jump. # # This endpoint yields events progressively: # 1. event:phase1 — signals + top_hits (~1s) # 2. event:argument — each argument (one-by-one, ~200ms apart) # 3. event:meta — citations + statutes + outcomes # 4. event:done — permalink_id, completion marker # # This is "pseudo-streaming" — we don't stream the model itself # (synthesizer is sync). But progressive yields with small sleeps # between arguments give the UX feel of "the answer is being typed # in", which is what users expect from modern AI surfaces. # ────────────────────────────────────────────────────────────────────── @app.post("/v1/lawyer/ask-stream") async def lawyer_ask_stream(body: _LawyerQARequest, request: Request = None): # type: ignore import asyncio import json as _sj from fastapi.responses import StreamingResponse from starlette.concurrency import run_in_threadpool def _evt(event: str, data: Dict[str, Any]) -> str: """Format a single SSE event line.""" return f"event: {event}\ndata: {_sj.dumps(data, ensure_ascii=False)}\n\n" async def generate(): q = (body.question or "").strip() if not q: yield _evt("error", {"reason": "empty_question"}) return # ── Phase 1: signals (in threadpool because retrieval is sync) def _do_signals(): domain_top = None domain_score = 0.0 try: from ..domain_classifier import classify d = classify(q) domain_top = d.get("top") domain_score = float((d.get("scores") or {}).get(domain_top, 0.0)) except Exception: pass from ..pipeline import get_pipeline from ..core.types import Query as _Query pipe = get_pipeline() relevance = 0.0 n_hits = 0 top_hits: List[Dict[str, Any]] = [] try: qry = _Query(text=q) retrieved = pipe.retrievers.search(qry, k=body.top_k) ranked = sorted( retrieved, key=lambda r: float(getattr(r, "score", 0.0)), reverse=True, ) scored = [float(getattr(r, "score", 0.0)) for r in ranked] n_hits = len(scored) if scored: relevance = sum(scored[:3]) / min(3, len(scored)) for r in ranked[:3]: chunk = getattr(r, "chunk", None) or r cid = (getattr(chunk, "doc_id", None) or getattr(r, "doc_id", None)) md = getattr(chunk, "metadata", {}) or {} top_hits.append({ "case_id": str(cid or ""), "title": md.get("title") or str(cid or ""), "score": round(float(getattr(r, "score", 0.0)), 3), "snippet": (getattr(chunk, "text", "") or "")[:200], }) except Exception: pass if relevance >= body.borderline_below: conf = "high" elif relevance >= body.out_of_scope_below or domain_score >= 0.6: conf = "borderline" else: conf = "out_of_scope" return { "confidence": conf, "relevance": relevance, "domain_top": domain_top, "domain_score": domain_score, "n_hits": n_hits, "top_hits": top_hits, "pipe": pipe, } try: sig = await run_in_threadpool(_do_signals) except Exception as e: yield _evt("error", {"reason": "phase1_failed", "error": f"{type(e).__name__}: {e}"}) return # First event — even out_of_scope gets a phase1 with top_hits early = _format_lawyer_response( confidence=sig["confidence"], relevance=sig["relevance"], domain_top=sig["domain_top"], domain_score=sig["domain_score"], n_hits=sig["n_hits"], top_hits=sig["top_hits"], ) early["phase"] = "early" yield _evt("phase1", early) # Out of scope — done after phase1 if sig["confidence"] == "out_of_scope": yield _evt("done", {"answered": False, "confidence": "out_of_scope"}) return # ── Phase 2: full synthesizer (heavy) def _do_synth(): from ..intelligence import StrategySynthesizer # type: ignore pipe = sig["pipe"] named = getattr(pipe.retrievers, "_retrievers", {}) or {} cbr_r = named.get("hebrew_encoder") or pipe.retrievers syn = StrategySynthesizer( retriever=pipe.retrievers, cbr_retriever=cbr_r, full_text_loader=lambda did: pipe.get_text(did) or "", pipeline=pipe, polish_with_tau_llm=False, ) result = syn.synthesize( user_facts=q, side=body.side or "plaintiff", top_k=body.top_k, ) return (result.to_dict() if hasattr(result, "to_dict") else dict(result)) # Day 42 — heartbeat during synth. # Background: _do_synth() is a single blocking call (StrategySynthesizer # does CBR retrieval → doctrine classify → argument building → polish). # It runs in a threadpool, so the async loop is free — but emits NOTHING # for 5-15s. From the user's perspective: phase1 lands, then silence, # then everything in a burst. That's why "no streaming feel" even though # it's technically SSE. # Fix: race _do_synth against a heartbeat loop that emits "synthesizing" # events every ~1s with rotating Hebrew status messages, so the UI keeps # visibly updating during the silent middle. synth_task = asyncio.create_task(run_in_threadpool(_do_synth)) _HEARTBEAT_MSGS = [ "מאתר פסיקה רלוונטית מהקורפוס...", "מסווג דוקטרינות משפטיות...", "בונה טיעון מהפסיקה (verbatim)...", "מחבר ציטוטים לחקיקה...", "מוודא דיוק הציטוטים...", "מסכם תוצאות מהפסיקה...", "מארגן את התשובה...", ] _hb_i = 0 while not synth_task.done(): try: # Wait for either the task to finish or 1s heartbeat tick await asyncio.wait_for(asyncio.shield(synth_task), timeout=1.0) except asyncio.TimeoutError: msg = _HEARTBEAT_MSGS[_hb_i % len(_HEARTBEAT_MSGS)] yield _evt("synthesizing", { "step": _hb_i + 1, "message_he": msg, }) _hb_i += 1 continue except Exception: break try: syn_dict = synth_task.result() except Exception as e: yield _evt("error", {"reason": "synth_failed", "error": f"{type(e).__name__}: {e}"}) return # Format the final response so we know what to stream final = _format_lawyer_response( confidence=sig["confidence"], relevance=sig["relevance"], domain_top=sig["domain_top"], domain_score=sig["domain_score"], n_hits=sig["n_hits"], top_hits=sig["top_hits"], syn_result=syn_dict, ) # Persist for permalink try: qa_id = _save_lawyer_answer(q, body.side or "plaintiff", final, request) if qa_id: final["permalink_id"] = qa_id final["permalink_url"] = f"/?qa={qa_id}" except Exception: pass # Day 44 — sentence/clause-aware streaming for "processual" feel. # User feedback after Day 43: ~28-char word chunks at 150ms felt # too rapid — "everything appears in fractions of a second", not # processual. Pivot to LARGER chunks (sentences/clauses, ~80-150 # chars) at slower pacing (~450ms), with clear visible pauses # between arguments (1.2s). Each chunk now feels like a # deliberate step, not a stream of letters. def _chunk_text(text: str, target: int = 100): """Yield chunks of ~target chars, preferring the FIRST sentence-end (.!?\\n) found in window [target-40, target+60], then the first clause-end (,;:), then any word boundary at ~target. Forward search keeps chunks close to target — backward search wrongly favored the trailing punctuation of the whole text and yielded one mega-chunk.""" if not text: return n = len(text) i = 0 while i < n: # Tail shorter than target+30 → yield it whole if n - i <= target + 30: yield text[i:] return lo = i + max(40, target - 40) hi = min(n - 1, i + target + 60) strong = -1 for j in range(lo, hi + 1): c = text[j] if c in ".!?\n" and (j + 1 >= n or text[j + 1] in " \n\t"): strong = j + 1 break if strong > 0: yield text[i:strong] i = strong continue weak = -1 for j in range(lo, hi + 1): c = text[j] if c in ",;:" and (j + 1 >= n or text[j + 1] == " "): weak = j + 1 break if weak > 0: yield text[i:weak] i = weak continue end = min(i + target, n) while end < n and text[end] not in " \n\t": end += 1 if end < n: end += 1 yield text[i:end] i = end # Day 45 — queue-based streaming. # Backend pushes ALL chunks as fast as possible (no sleeps). # Client buffers them in a local queue and renders at a fixed # cadence. Decouples backend wall-clock from user-facing pace: # backend stays fast, user sees smooth controlled stream # regardless of network jitter or server load. for i, arg in enumerate(final.get("arguments") or []): arg_text = arg.get("argument") or "" head_meta = {k: v for k, v in arg.items() if k != "argument"} yield _evt("argument_start", {"index": i, **head_meta}) for chunk in _chunk_text(arg_text, target=100): yield _evt("argument_chunk", { "index": i, "text": chunk, }) yield _evt("argument_end", {"index": i}) # Citations + statutes + outcomes — single batch event yield _evt("meta", { "citations": final.get("citations") or [], "statutes": final.get("statutes") or [], "outcome_summary": final.get("outcome_summary") or {}, }) # Done — frontend can stop listening, save to history yield _evt("done", { "confidence": final.get("confidence"), "permalink_id": final.get("permalink_id"), "permalink_url": final.get("permalink_url"), "answered": final.get("answered"), }) return StreamingResponse( generate(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "X-Accel-Buffering": "no", # disable nginx buffering for SSE }, ) @app.post("/v1/lawyer/ask") def lawyer_ask(body: _LawyerQARequest, request: Request = None): # type: ignore """Answer a lawyer's question — with three-tier confidence. Pipeline: 1. Domain classifier → top legal domain + score 2. Quick retrieval probe → top-k similarity scores → relevance 3. Determine confidence tier from relevance + domain_score: • relevance >= borderline_below → "high" • relevance >= out_of_scope_below → "borderline" (still answers, but with a clear caveat) • else → "out_of_scope" (refuses, returns hint) 4. For high/borderline: run synthesizer, structure the response with explicit arguments + citations + disclaimer. 5. For out_of_scope: return the hint, don't run the synthesizer. The middle tier ("borderline") is the key change. Lawyers often ask questions that are *partially* covered — with the old binary gate they got either a confident answer or a flat refusal. Now they get an answer with explicit "low confidence" framing. """ q = (body.question or "").strip() if not q: return {"answered": False, "confidence": "out_of_scope", "reason": "empty_question"} # 1. Domain classification domain_top = None domain_score = 0.0 try: from ..domain_classifier import classify d = classify(q) domain_top = d.get("top") domain_score = float((d.get("scores") or {}).get(domain_top, 0.0)) except Exception: pass # 2. Retrieval relevance probe — also captures top-3 hits with their # case IDs/scores so the response can show the user WHAT was found # when relevance is borderline (instead of a bare 0.27 number). from ..pipeline import get_pipeline from ..core.types import Query as _Query pipe = get_pipeline() relevance = 0.0 n_hits = 0 top_hits: List[Dict[str, Any]] = [] try: query = _Query(text=q) retrieved = pipe.retrievers.search(query, k=body.top_k) # Sort once, keep both scores and case info ranked = sorted( retrieved, key=lambda r: float(getattr(r, "score", 0.0)), reverse=True, ) scored = [float(getattr(r, "score", 0.0)) for r in ranked] n_hits = len(scored) if scored: relevance = sum(scored[:3]) / min(3, len(scored)) # Capture top-3 for the borderline explainer for r in ranked[:3]: chunk = getattr(r, "chunk", None) or r cid = (getattr(chunk, "doc_id", None) or getattr(r, "doc_id", None) or getattr(chunk, "id", None)) md = getattr(chunk, "metadata", {}) or {} top_hits.append({ "case_id": str(cid or ""), "title": md.get("title") or str(cid or ""), "score": round(float(getattr(r, "score", 0.0)), 3), "snippet": (getattr(chunk, "text", "") or "")[:200], }) except Exception: pass # 3. Determine confidence tier. Domain score acts as a tie-breaker: # a high-confidence domain match can rescue borderline relevance # (the question IS clearly legal, our corpus just doesn't have a # close fact pattern — answer with caveat). if relevance >= body.borderline_below: confidence = "high" elif relevance >= body.out_of_scope_below or domain_score >= 0.6: confidence = "borderline" else: confidence = "out_of_scope" if confidence == "out_of_scope": # Don't persist out-of-scope answers — nothing useful to share. return _format_lawyer_response( confidence="out_of_scope", relevance=relevance, domain_top=domain_top, domain_score=domain_score, n_hits=n_hits, top_hits=top_hits, ) # Early return — `early=true` means "give me just the signals, skip # the synthesizer". The frontend will render a placeholder screen # with these signals + top_hits, then re-query with early=false # for the full arguments. Cuts time-to-first-paint from 5-10s to # ~1-2s (just the retrieval probe). if body.early: early_resp = _format_lawyer_response( confidence=confidence, relevance=relevance, domain_top=domain_top, domain_score=domain_score, n_hits=n_hits, top_hits=top_hits, syn_result=None, # skipped — frontend knows it's early ) early_resp["phase"] = "early" return early_resp # 4. Run synthesizer try: from ..intelligence import StrategySynthesizer # type: ignore named = getattr(pipe.retrievers, "_retrievers", {}) or {} cbr_r = named.get("hebrew_encoder") or pipe.retrievers syn = StrategySynthesizer( retriever=pipe.retrievers, cbr_retriever=cbr_r, full_text_loader=lambda did: pipe.get_text(did) or "", pipeline=pipe, polish_with_tau_llm=False, ) result = syn.synthesize( user_facts=q, side=body.side or "plaintiff", top_k=body.top_k, ) syn_dict = (result.to_dict() if hasattr(result, "to_dict") else dict(result)) formatted = _format_lawyer_response( confidence=confidence, relevance=relevance, domain_top=domain_top, domain_score=domain_score, n_hits=n_hits, top_hits=top_hits, syn_result=syn_dict, ) # Enrich with hierarchical-graph bundle when available. Adds the # structured doctrine view (anchor + applications + origins + # statute_refs + fact_mapping) alongside the existing flat # arguments. Frontend can render either based on which fields # are populated. Failure is non-fatal — bundle is purely # additive. try: from ..hierarchical_graph import get_or_build_hgraph hg = get_or_build_hgraph(pipe) bundle = hg.build_argument(q, side=body.side or "plaintiff") bundle_dict = bundle.to_dict() # Skip empty bundle (no cluster found) so frontend can fall # back to flat arguments rendering if bundle_dict.get("cluster_id"): formatted["bundle"] = bundle_dict # Promote the bundle's anchor_quote to arguments[0] when # the bundle is confident. The case_based_arguments # pipeline does semantic clustering over top-K hits and # can surface the dominant CORPUS theme rather than the # cluster the query routes to. The graph bundle is the # doctrine-aware source of truth. graph_argument = bundle.to_argument_text() if graph_argument: # v2.89.3 — surface section_origin on the graph_bundle # entry too (the bundle's anchor_section from the # diagnostic dict). if bundle_dict.get("diagnostic"): graph_argument.setdefault( "section_origin", bundle_dict["diagnostic"].get("anchor_section")) # Mark legacy entries as supplementary so the UI can # de-emphasize them when graph_bundle is leading. legacy = formatted.get("arguments") or [] for la in legacy: if la.get("polish_method") != "graph_bundle": la["is_supplementary"] = True formatted["arguments"] = ([graph_argument] + legacy)[:6] # Confidence quality gate. The relevance-based confidence # set earlier (line ~19792) only checks BM25/dense scores — # which return *something* even for OOS queries. The bundle # is the doctrine-aware source of truth: if `can_promote()` # is False, the cluster routing didn't find a strong-enough # anchor, so we shouldn't claim "high" confidence regardless # of how relevance scored. Downgrade to "borderline" so the # UI shows the disclaimer prominently. if not bundle.can_promote() and formatted.get("confidence") == "high": formatted["confidence"] = "borderline" formatted["confidence_downgraded"] = "weak_bundle" except Exception as _bundle_e: # Log but don't fail the request print(f"[lawyer_ask] hgraph bundle skipped: {_bundle_e}") # v2.96 (Day 7) — Tier B fallback. When Tier A's bundle is weak # (borderline or out_of_scope confidence) we query the per-domain # shard router for additional verbatim citations from the 525K-doc # corpus. Surfaced as a separate `tier_b_results` field so the UI # can render them under a "ציטוטים נוספים" header — distinct # quality bar from the main Tier A answer. confidence_now = formatted.get("confidence", "") if confidence_now in ("borderline", "out_of_scope"): try: from ..retrieve.shard_router import get_shard_router router = get_shard_router() if router.is_available(): tier_b = router.search(q, k=5) if tier_b: formatted["tier_b_results"] = tier_b except Exception as _tb_e: print(f"[lawyer_ask] tier_b fallback skipped: {_tb_e}") # v2.96.4 (Day 7) — Curated doctrine catalog match. Same # pattern as /v1/argument/analyze: run classify_doctrines on # the question, surface as doctrine_match if score>=1.5. try: from ..intelligence.doctrine_classifier import classify_doctrines doc_matches = classify_doctrines(q, k=3, min_score=1.0) if doc_matches: top = doc_matches[0] dm = top.to_dict() if len(doc_matches) > 1: dm["alternatives"] = [m.to_dict() for m in doc_matches[1:3]] formatted["doctrine_match"] = dm except Exception as _dc_e: print(f"[lawyer_ask] doctrine_classifier skipped: {_dc_e}") # Persist for permalink — best-effort, never blocks the response. # 24h post-startup, also opportunistically prune expired entries. try: qa_id = _save_lawyer_answer(q, body.side or "plaintiff", formatted, request) if qa_id: formatted["permalink_id"] = qa_id formatted["permalink_url"] = f"/?qa={qa_id}" except Exception: pass # Cheap periodic cleanup — run once per ~1000 answers try: if (sum(_WA_STATS.get(k, 0) for k in ("kept",)) % 1000) == 0: _lawyer_qa_prune() except Exception: pass return formatted except Exception as e: return { "answered": False, "reason": "synthesizer_error", "error": f"{type(e).__name__}: {e}", } # ────────────────────────────────────────────────────────────────────── # v2.93.0 (Day 4) — Argument Classifier # # /v1/argument/analyze takes a single legal claim + side and returns # bilateral analysis: pro_arguments (supporting), con_arguments # (opposing), strength_score, doctrine bundle, and missing facts. # # Implementation: calls the existing synthesizer twice (plaintiff + # defendant frames) to get both sides, de-dups by case ID, classifies # each retrieved paragraph by lexical heuristic into a section hint, # and scores strength as the pro/(pro+con) ratio of retrieval scores. # # No external LLM. No corpus reindex. Honest about its heuristic nature # in `meta.method`. # ────────────────────────────────────────────────────────────────────── import re as _argclf_re # local alias — module-level `re` isn't imported class _ArgumentAnalyzeRequest(BaseModel): # type: ignore claim: str side: Optional[str] = "אובייקטיבי" top_k: int = 10 # Heuristic section classifier — runs on each retrieved paragraph text. # Order matters: ruling/holding markers checked BEFORE party-claim markers # (some rulings quote the parties before deciding). _SECTION_RULING_RX = _argclf_re.compile( r"(הכרעה|סבורני|לפיכך|אני קובע|אני פוסק|מסקנת|המסקנה|" r"בית[\s\-]?המשפט קבע|נדחית|נדחתה|מתקבלת|מתקבל)" ) _SECTION_PLAINTIFF_RX = _argclf_re.compile( r"(טוען\s+ה?תובע|לטענת\s+ה?תובע|טענת\s+ה?תובע|" r"טוען\s+ה?מבקש|לטענת\s+ה?מבקש|טענת\s+ה?מבקש|" r"לדבריו\s+של\s+ה?תובע|העותר\s+טוען|טענות\s+ה?עותר)" ) _SECTION_DEFENDANT_RX = _argclf_re.compile( r"(טוען\s+ה?נתבע|לטענת\s+ה?נתבע|טענת\s+ה?נתבע|" r"טוען\s+ה?משיב|לטענת\s+ה?משיב|טענת\s+ה?משיב|" r"לדבריו\s+של\s+ה?נתבע|המשיבה\s+טוענת|טענות\s+ה?משיב)" ) _SECTION_DISCUSSION_RX = _argclf_re.compile( r"(לעניין\s+זה|כידוע|מן\s+הראוי|המבחן\s+הוא|" r"מבחן\s+ה|הלכה\s+פסוקה|נפסק\s+כי|הלכת)" ) def _classify_paragraph_section(text: str) -> str: """Lexical classifier — returns one of: 'plaintiff_claim', 'defendant_claim', 'ruling', 'discussion', 'unlabeled' """ t = text[:500] # short prefix is enough; full-text scan would over-match if _SECTION_RULING_RX.search(t): return "ruling" if _SECTION_PLAINTIFF_RX.search(t): return "plaintiff_claim" if _SECTION_DEFENDANT_RX.search(t): return "defendant_claim" if _SECTION_DISCUSSION_RX.search(t): return "discussion" return "unlabeled" def _strength_caption(score: int) -> str: """Strength score → Hebrew caption per LANDING_COPY.""" if score >= 90: return "טענה חזקה — פסיקה תומכת חזקה ומגוונת" if score >= 75: return "טענה חזקה יחסית — פסיקה תומכת מספקת" if score >= 50: return "טענה בינונית — פסיקה מעורבת" if score >= 25: return "טענה חלשה — פסיקה ברובה סותרת" return "טענה חלשה מאוד או חסרה פסיקה רלוונטית" # v2.99 (Day 9) — Extract case citation from argument text when the # `source_cases` metadata is empty. The synthesizer often embeds the # citation in-line like `[CL-בגץ7585_01]` or as a Hebrew citation # `בג"ץ 7585/01` near the start. Without this, every result showed # "(ללא ציטוט)" which crushed credibility. _CITE_BRACKET_RX = _argclf_re.compile( r"\[CL-([א-תa-zA-Z0-9_]+)\]" ) _CITE_HEBREW_RX = _argclf_re.compile( r"\b((?:ע\"?א|ע\"?פ|בג\"?ץ|רע\"?א|רע\"?פ|דנ\"?א|דנ\"?פ|בש\"?פ|" r"ע\"?ע|בג\"?צ|בש\"?א|תמ\"?ש|ה\"?פ|בה\"?ן)\s*\d+(?:[\/\-]\d+)+)" ) def _extract_citation_from_text(text: str) -> Optional[str]: """Best-effort case-citation extraction. Tries (in order): 1. Hebrew citation pattern ('ע"א 1234/22', 'בג"ץ 5856/03') 2. [CL-] bracket marker Returns the most-readable form, or None.""" if not text: return None # 1. Native Hebrew citation — most user-friendly m = _CITE_HEBREW_RX.search(text[:1500]) if m: return m.group(1).strip() # 2. CL-bracket — normalize back to Hebrew form m = _CITE_BRACKET_RX.search(text[:1500]) if m: raw = m.group(1) # 'בגץ5856_03' → 'בג"ץ 5856/03' for prefix, hebrew in ( ("בגץ", 'בג"ץ '), ("ע\"א", 'ע"א '), # already with quote ("עא", 'ע"א '), ("רעא", 'רע"א '), ("רעפ", 'רע"פ '), ("דנא", 'דנ"א '), ("ע\"ע", 'ע"ע '), ("עע", 'ע"ע '), ("בשפ", 'בש"פ '), ("בשא", 'בש"א '), ("הפ", 'ה"פ '), ("תמש", 'תמ"ש '), ): if raw.startswith(prefix): rest = raw[len(prefix):].replace("_", "/") return f"{hebrew}{rest}" # Unknown prefix — just return raw with _ → / return raw.replace("_", "/") return None def _shape_argument_for_analyze(arg: dict, section_hint: str) -> dict: """Reshape a drafted_argument dict from synthesizer into the analyze response shape — keeps verbatim text, adds section_hint, trims fields the UI doesn't need. Source structure: drafts have `source_cases` (list of case dicts with case_id, title, url, outcome, score) or sometimes a flat `source_case_id`. When neither is populated, fall back to extracting the citation from the argument text itself. """ text = (arg.get("argument") or arg.get("text") or "") sources = (arg.get("source_cases") or arg.get("sources") or arg.get("citations") or []) primary_src = sources[0] if sources else {} case_label = (primary_src.get("title") or primary_src.get("case_id") or primary_src.get("id") or arg.get("source_case_id")) if not case_label: # v2.99 — last resort: parse the text case_label = _extract_citation_from_text(text) or "(ללא ציטוט)" return { "text": text, "source_case": case_label, "source_score": round(float(primary_src.get("score") or arg.get("score") or 0.0), 3), "section_hint": section_hint, "n_sources": int(arg.get("n_sources") or len(sources) or 0), } @app.post("/v1/argument/analyze") def argument_analyze(body: _ArgumentAnalyzeRequest, request: Request = None): # type: ignore """Bilateral argument analysis — runs synthesizer for plaintiff + defendant frames, classifies each result by section, returns pro/con split + strength score. v2.94 (Day 5): gated to Solo+. Costs 5 credits per call. Identity = X-User-Email header (set by frontend from Google Sign-In session). Anonymous callers (no header) get Free tier, which doesn't unlock argument_analyze — they see a 403. """ claim = (body.claim or "").strip() if not claim: return {"ok": False, "reason": "empty_claim"} # Tier gate user_email = None if request is not None: user_email = (request.headers.get("X-User-Email") or "").strip() or None from ..middleware.entitlements import check_entitlement decision = check_entitlement(user_email, "argument_analyze") if not decision.allowed: # 402 Payment Required for credits, 403 Forbidden for tier-locked. status = 402 if decision.reason == "credits_exhausted" else 403 body_he = { "ok": False, "reason": decision.reason, "entitlement": decision.to_dict(), "message_he": ( "המנוי שלך הגיע למכסת הקרדיטים החודשית. " "שדרג ל-Pro לקבלת 1,500 קרדיטים בחודש." if decision.reason == "credits_exhausted" else "ניתוח טענה הוא פיצ׳ר חבילות בתשלום (Solo ומעלה). " "שדרג ל-Solo (₪199 לחיים — Founding 50)." ), "upgrade_url": "/pricing", } return JSONResponse(status_code=status, content=body_he) try: from ..pipeline import get_pipeline from ..intelligence import StrategySynthesizer from ..hierarchical_graph import get_or_build_hgraph except Exception as e: return {"ok": False, "reason": "import_failed", "error": f"{type(e).__name__}: {e}"} pipe = get_pipeline() named = getattr(pipe.retrievers, "_retrievers", {}) or {} cbr_r = named.get("hebrew_encoder") or pipe.retrievers syn = StrategySynthesizer( retriever=pipe.retrievers, cbr_retriever=cbr_r, full_text_loader=lambda did: pipe.get_text(did) or "", pipeline=pipe, polish_with_tau_llm=False, ) # Run synthesizer twice — one frame per side. The synthesizer returns # a nested dict: result["case_based_arguments"]["drafted_arguments_for_user"] # is the list of args. Each draft has `argument` (text), `source_cases` # (case metadata), `polish_method`, `section_origin`. def _frame(side: str) -> list: try: r = syn.synthesize(user_facts=claim, side=side, top_k=body.top_k) d = r.to_dict() if hasattr(r, "to_dict") else dict(r) cba = d.get("case_based_arguments") or {} return cba.get("drafted_arguments_for_user") or [] except Exception as _e: print(f"[argument/analyze] _frame({side}) failed: {_e}") return [] pro_raw = _frame("plaintiff") con_raw = _frame("defendant") # De-dup by source case across the two sets — a case shouldn't # appear on both sides simultaneously. Keep the higher-score side. def _arg_src_id(arg: dict) -> str: src_list = arg.get("source_cases") or arg.get("sources") or [] if src_list: s0 = src_list[0] return (s0.get("case_id") or s0.get("id") or s0.get("title") or arg.get("argument", "")[:80]) return arg.get("source_case_id") or arg.get("argument", "")[:80] def _arg_score(arg: dict) -> float: src_list = arg.get("source_cases") or arg.get("sources") or [] if src_list and src_list[0].get("score") is not None: return float(src_list[0]["score"]) return float(arg.get("score") or 0.0) seen: dict = {} # case_id → (side, score, arg) for arg in pro_raw: src = _arg_src_id(arg) seen[src] = ("pro", _arg_score(arg), arg) for arg in con_raw: src = _arg_src_id(arg) s = _arg_score(arg) if src in seen: _prev_side, prev_s, _prev_arg = seen[src] if s > prev_s: seen[src] = ("con", s, arg) else: seen[src] = ("con", s, arg) # Build the bilateral split with section classification pro_args = [] con_args = [] for src, (side, score, arg) in seen.items(): text = arg.get("argument") or arg.get("text") or "" section = _classify_paragraph_section(text) shaped = _shape_argument_for_analyze(arg, section) (pro_args if side == "pro" else con_args).append(shaped) # Strength score: pro/(pro+con) of retrieval scores, scaled to 0-100 pro_sum = sum(a["source_score"] for a in pro_args) or 0.001 con_sum = sum(a["source_score"] for a in con_args) or 0.001 raw_strength = pro_sum / (pro_sum + con_sum) strength_score = int(round(raw_strength * 100)) # Doctrine bundle (shared between both sides) doctrine = None missing_facts: list = [] try: hg = get_or_build_hgraph(pipe) bundle = hg.build_argument(claim, side=body.side or "plaintiff") bd = bundle.to_dict() if bd.get("cluster_id"): doctrine = { "anchor_label": bd.get("anchor_label"), "anchor_quote": bd.get("anchor_quote"), "cluster_score": bd.get("cluster_score"), "coverage": bd.get("coverage"), "promoted": bundle.can_promote(), } # Missing facts: fact_mapping entries flagged as not-covered for fm in bd.get("fact_mapping") or []: if isinstance(fm, dict) and fm.get("covered") is False: missing_facts.append(fm.get("element") or fm.get("label") or "") # If can_promote is False, strength is capped at 50 if doctrine and not doctrine.get("promoted"): strength_score = min(strength_score, 50) except Exception as e: print(f"[argument/analyze] doctrine bundle skipped: {e}") # v2.96 (Day 7) — Tier B fallback. When the doctrine bundle is # weak (can_promote=False) OR we got <3 total args from Tier A, # query the per-domain shard router for additional verbatim # citations from the wider 525K-doc corpus. Surfaced as a # separate field so the UI can label these as "ציטוטים מהקורפוס # הרחב" (no doctrine framing) rather than mixing with the # bilateral Tier A results. tier_b_results: list = [] needs_fallback = ( (doctrine is None or not doctrine.get("promoted")) or (len(pro_args) + len(con_args)) < 3 ) if needs_fallback: try: from ..retrieve.shard_router import get_shard_router router = get_shard_router() if router.is_available(): tier_b_results = router.search(claim, k=5) except Exception as e: print(f"[argument/analyze] tier_b fallback skipped: {e}") # v2.96.4 (Day 7) — Curated doctrine catalog match. Run the # `classify_doctrines` keyword classifier over the claim text. If # the top match is confident (score >= 1.5 = at least one keyword # + one statute/element OR two keywords), surface as a separate # `doctrine_match` field. UI renders this as a high-trust chip # above the bundle anchor, since it's a NAMED doctrine from a # curated catalog vs. an emergent cluster. doctrine_match = None try: from ..intelligence.doctrine_classifier import classify_doctrines matches = classify_doctrines(claim, k=3, min_score=1.0) if matches: top = matches[0] doctrine_match = top.to_dict() # Include up to 2 secondary matches in case the lawyer wants # to explore alternatives ("is this really good faith, or # could it be estoppel?") if len(matches) > 1: doctrine_match["alternatives"] = [ m.to_dict() for m in matches[1:3] ] except Exception as e: print(f"[argument/analyze] doctrine_classifier skipped: {e}") return { "ok": True, "claim": claim, "side": body.side or "אובייקטיבי", "doctrine": doctrine, "doctrine_match": doctrine_match, "pro_arguments": pro_args[:8], "con_arguments": con_args[:8], "strength_score": strength_score, "strength_caption": _strength_caption(strength_score), "missing_facts": [f for f in missing_facts if f][:5], "tier_b_results": tier_b_results, "entitlement": decision.to_dict(), "meta": { "method": "bilateral_dual_frame", "section_classifier": "lexical_heuristic_v1", "n_pro": len(pro_args), "n_con": len(con_args), "n_tier_b": len(tier_b_results), "tier_b_triggered": needs_fallback, }, } # ────────────────────────────────────────────────────────────────────── # v2.94 (Day 5) — Entitlements query, admin, and billing webhook # # GET /v1/entitlements/me — current user reads own tier # POST /v1/admin/entitlements/set — admin grants tier to email # POST /v1/admin/entitlements/list — admin lists all # POST /v1/billing/webhook — Lemon Squeezy stub (TODO) # ────────────────────────────────────────────────────────────────────── class _AdminSetTierRequest(BaseModel): # type: ignore email: str tier: str valid_until_days: Optional[int] = 31 founding: bool = False def _check_admin_key(request) -> bool: """Admin endpoints — delegate to the SAME `auth.is_admin()` check the middleware uses at /v1/admin/*. Any key that passed the middleware will pass here. Single source of truth. Accepts both `X-API-Key` (canonical) and `X-Admin-Key` (legacy) headers. Previously this compared the raw value to TAU_RAG_ADMIN_KEY env var directly — that broke when the keystore had multiple admin keys (bootstrap-generated + env-var-forced), because the auth.is_admin lookup used a hash of either, but the env-var comparison only matched one specific value. """ key = (request.headers.get("X-API-Key") or request.headers.get("X-Admin-Key") or "").strip() if not key: return False try: from ..middleware.auth import get_auth return get_auth().is_admin(key) except Exception: # Fallback to env-var comparison if auth store unavailable. expected = _os.environ.get("TAU_RAG_ADMIN_KEY", "").strip() return bool(expected) and key == expected @app.get("/v1/entitlements/me") def entitlement_me(request: Request): # type: ignore """Returns the caller's current tier + credits remaining. Identity from X-User-Email header (anonymous → free).""" email = (request.headers.get("X-User-Email") or "").strip() or None from ..middleware.entitlements import get_entitlement_store, TIER_DEFS store = get_entitlement_store() info = store.get_tier(email) # Include the tier's pricing copy so the UI can render directly info["tier_def"] = { "monthly_credits": TIER_DEFS[info["tier"]]["monthly_credits"], "price_he": TIER_DEFS[info["tier"]]["price_he"], "unlocked_endpoints": sorted( TIER_DEFS[info["tier"]]["unlocked_endpoints"]), } return info @app.post("/v1/admin/entitlements/set") def admin_set_tier(body: _AdminSetTierRequest, request: Request): # type: ignore """Grant/revoke a tier manually. Used pre-payment-integration and for Founding 50 manual assignments.""" if not _check_admin_key(request): return JSONResponse(status_code=403, content={"ok": False, "reason": "admin_required"}) from ..middleware.entitlements import get_entitlement_store store = get_entitlement_store() try: new_info = store.set_tier( email=body.email, tier=body.tier, valid_until_days=body.valid_until_days, founding=body.founding, ) return {"ok": True, "entitlement": new_info} except ValueError as e: return JSONResponse(status_code=400, content={"ok": False, "reason": str(e)}) @app.get("/v1/admin/entitlements/list") def admin_list_entitlements(request: Request, # type: ignore limit: int = 100): """List all entitlement rows. Admin-only.""" if not _check_admin_key(request): return JSONResponse(status_code=403, content={"ok": False, "reason": "admin_required"}) from ..middleware.entitlements import get_entitlement_store store = get_entitlement_store() return {"ok": True, "count": None, "entitlements": store.list_all(limit)} @app.post("/v1/billing/webhook") async def billing_webhook(request: Request): # type: ignore """Lemon Squeezy webhook STUB. TODO when LS credentials are available: 1. Verify HMAC signature: X-Signature header vs hmac.new(secret, body, sha256).hexdigest() 2. Parse event_name from JSON ('subscription_created', 'subscription_updated', 'subscription_cancelled', 'order_created' for Founding 50 one-time) 3. Map product variant_id → tier: legal-eye-founding-50 → solo (founding=True, valid_until=None) legal-eye-solo → solo (valid_until_days=31) legal-eye-pro → pro legal-eye-firm → firm 4. Extract email from data.attributes.user_email 5. Call store.set_tier(email, tier, valid_until_days, founding) Right now: logs payload, returns 200 so LS doesn't retry.""" try: body_bytes = await request.body() sig = request.headers.get("X-Signature", "(none)") # Log only — actual processing deferred to when LS credentials # exist and we wire up the HMAC verifier. print(f"[billing/webhook] received len={len(body_bytes)} sig={sig[:16]}...") # Lemon Squeezy expects 200 within 5s or it retries with # exponential backoff. We ack immediately, process async # (eventually). return {"ok": True, "stub": True, "note": "webhook handler not yet wired — payload logged"} except Exception as e: print(f"[billing/webhook] error: {e}") return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) # ────────────────────────────────────────────────────────────────────── # v2.95 (Day 6) — Litigation Memo Generator # # POST /v1/memo/generate # # Takes a pre-computed argument analysis result + the original claim and # produces a Hebrew RTL .docx memo file. The client is expected to have # JUST run /v1/argument/analyze and pass that result back here — avoids # the cost of re-running the bilateral synthesis. Memo cost = 15 credits # (per MONETIZATION.md), so a complete workflow is: # 1. POST /v1/argument/analyze (5 credits, returns analysis) # 2. POST /v1/memo/generate (15 credits, returns DOCX) # Total: 20 credits per case = ~10 memos/month on Solo (200 credits). # # Returns binary DOCX with Content-Type # application/vnd.openxmlformats-officedocument.wordprocessingml.document. # ────────────────────────────────────────────────────────────────────── class _MemoGenerateRequest(BaseModel): # type: ignore claim: str side: Optional[str] = "אובייקטיבי" # The full result dict from /v1/argument/analyze. Client passes its # last-known-good analysis to avoid the re-compute cost. analysis: Optional[Dict[str, Any]] = None # Optional free-text facts the lawyer wants to embed. Rendered as-is # in the "רקע עובדתי" section. facts: Optional[str] = "" # Optional author name (overrides anonymous "עורך-דין") author_name: Optional[str] = "" def _build_argument_memo_docx(claim: str, side: str, analysis: dict, facts: str = "", author_name: str = "") -> bytes: """Generate Hebrew RTL .docx memo from an argument analysis result. Sections: 1. Header (claim + date + side + author) 2. רקע עובדתי (free-text facts, or placeholder) 3. הדוקטרינה הרלוונטית (verbatim doctrine quote) 4. טענות תומכות (pro_arguments, verbatim) 5. טענות נגדיות (con_arguments, verbatim) 6. עובדות חסרות לחיזוק הטענה (bullet list) 7. סיכום + ציון חוזק 8. Footer (disclaimer + Legal Eye attribution) """ from io import BytesIO import datetime as _dt try: from docx import Document as DocxDocument from docx.shared import Pt, Inches, RGBColor from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.oxml.ns import qn from docx.oxml import OxmlElement except ImportError: raise RuntimeError("python-docx not installed; pip install python-docx") doc = DocxDocument() # Default font + RTL style = doc.styles["Normal"] style.font.name = "David" style.font.size = Pt(11) rPr = style.element.get_or_add_rPr() rFonts = rPr.find(qn("w:rFonts")) if rFonts is None: rFonts = OxmlElement("w:rFonts") rPr.append(rFonts) rFonts.set(qn("w:cs"), "David") rFonts.set(qn("w:hAnsi"), "David") def _rtl(p): pPr = p._p.get_or_add_pPr() bidi = OxmlElement("w:bidi") bidi.set(qn("w:val"), "1") pPr.append(bidi) p.alignment = WD_ALIGN_PARAGRAPH.RIGHT return p def _h(text: str, level: int = 1): h = doc.add_heading(text, level=level) _rtl(h) for run in h.runs: run.font.color.rgb = RGBColor(0xBF, 0x9B, 0x30) # Old Gold return h def _para(text: str = "", italic: bool = False, color=None, size: int = 11): p = doc.add_paragraph() _rtl(p) p.paragraph_format.line_spacing = 1.45 if text: r = p.add_run(text) r.italic = italic r.font.size = Pt(size) if color is not None: r.font.color.rgb = color return p # ── Header ───────────────────────────────────────────────── title = doc.add_heading("מזכר משפטי — ניתוח טענה", level=0) _rtl(title) for run in title.runs: run.font.color.rgb = RGBColor(0xBF, 0x9B, 0x30) meta = _para() date_str = _dt.date.today().strftime("%d/%m/%Y") side_label = {"תובע": "תובע", "נתבע": "נתבע", "אובייקטיבי": "אובייקטיבי"}.get(side, side or "אובייקטיבי") meta.add_run(f"תאריך: {date_str}").font.size = Pt(10) meta.add_run(f" · נקודת מבט: {side_label}").font.size = Pt(10) if author_name: meta.add_run(f" · הוכן ע״י: {author_name}").font.size = Pt(10) # Claim card _h("הטענה לבדיקה", level=2) p = _para(claim) p.paragraph_format.left_indent = Inches(0.0) # ── 1. רקע עובדתי ──────────────────────────────────────── _h("רקע עובדתי", level=2) if facts and facts.strip(): for para in facts.split("\n\n"): if para.strip(): _para(para.strip()) else: _para( "[ למלא ע״י עוה״ד — מועדים, צדדים, השתלשלות עניינים, " "נקודות מחלוקת ]", italic=True, color=RGBColor(0x94, 0xA3, 0xB8), size=10, ) # ── 2. הדוקטרינה הרלוונטית ────────────────────────────── doctrine = analysis.get("doctrine") or {} if doctrine.get("anchor_label"): _h("הדוקטרינה הרלוונטית", level=2) p = _para() r = p.add_run(f"🎯 {doctrine['anchor_label']}") r.bold = True if not doctrine.get("promoted"): r2 = p.add_run(" ⚠ ביטחון נמוך — איכות הקלאסטר תחת הסף") r2.font.size = Pt(9) r2.font.color.rgb = RGBColor(0xB4, 0x53, 0x09) if doctrine.get("anchor_quote"): quote = doctrine["anchor_quote"].strip() qp = _para(f'"{quote[:600]}{"..." if len(quote) > 600 else ""}"', italic=True, color=RGBColor(0x47, 0x55, 0x69), size=10) qp.paragraph_format.left_indent = Inches(0.3) # ── 3. טענות תומכות ────────────────────────────────────── pro_args = analysis.get("pro_arguments") or [] if pro_args: _h(f"טענות תומכות ({len(pro_args)})", level=2) for i, arg in enumerate(pro_args, 1): p = _para() r = p.add_run(f"{i}. ") r.bold = True text = (arg.get("text") or "").strip() # Cap each arg at ~800 chars to keep the memo readable p.add_run(text[:800] + ("..." if len(text) > 800 else "")) # Source line under each argument src = arg.get("source_case") or "(ללא ציטוט)" score = arg.get("source_score") or 0 sp = _para() sp.paragraph_format.left_indent = Inches(0.3) sr = sp.add_run(f"📖 מקור: {src}") sr.font.size = Pt(9) sr.font.color.rgb = RGBColor(0x64, 0x74, 0x8B) if score: sr2 = sp.add_run(f" · דירוג: {int(float(score)*100)}") sr2.font.size = Pt(9) sr2.font.color.rgb = RGBColor(0x64, 0x74, 0x8B) # ── 4. טענות נגדיות ────────────────────────────────────── con_args = analysis.get("con_arguments") or [] if con_args: _h(f"טענות נגדיות ({len(con_args)})", level=2) for i, arg in enumerate(con_args, 1): p = _para() r = p.add_run(f"{i}. ") r.bold = True text = (arg.get("text") or "").strip() p.add_run(text[:800] + ("..." if len(text) > 800 else "")) src = arg.get("source_case") or "(ללא ציטוט)" score = arg.get("source_score") or 0 sp = _para() sp.paragraph_format.left_indent = Inches(0.3) sr = sp.add_run(f"📖 מקור: {src}") sr.font.size = Pt(9) sr.font.color.rgb = RGBColor(0x64, 0x74, 0x8B) if score: sr2 = sp.add_run(f" · דירוג: {int(float(score)*100)}") sr2.font.size = Pt(9) sr2.font.color.rgb = RGBColor(0x64, 0x74, 0x8B) # ── 5. עובדות חסרות לחיזוק ────────────────────────────── missing = analysis.get("missing_facts") or [] if missing: _h("עובדות חסרות לחיזוק הטענה", level=2) for f in missing: p = _para(f"• {f}") p.paragraph_format.left_indent = Inches(0.3) # ── 6. סיכום + ציון חוזק ───────────────────────────────── _h("סיכום", level=2) score = analysis.get("strength_score") or 0 caption = analysis.get("strength_caption") or "" sp = _para() sr = sp.add_run(f"ציון חוזק: {score}/100") sr.bold = True sr.font.size = Pt(13) sr.font.color.rgb = RGBColor(0xBF, 0x9B, 0x30) if caption: sp.add_run(f" · {caption}").font.size = Pt(11) n_pro = len(pro_args) n_con = len(con_args) _para( f"הניתוח מבוסס על {n_pro + n_con} מקורות " f"({n_pro} תומכים, {n_con} נגדיים). " f"כל הציטוטים מילה במילה מהפסיקה — אין הזיות AI." ) # ── Footer / Disclaimer ────────────────────────────────── doc.add_paragraph() # blank footer_h = doc.add_heading("הסתייגות", level=3) _rtl(footer_h) for run in footer_h.runs: run.font.color.rgb = RGBColor(0x94, 0xA3, 0xB8) _para( "⚠ Legal Eye הוא כלי מחקר משפטי, לא ייעוץ משפטי. " "המזכר מבוסס על שליפת ציטוטים מקורפוס פסיקת בית-המשפט העליון " "ועל heuristics לסיווג בעד/נגד. החלטה משפטית טעונה בדיקה אישית " "של עורך-דין מוסמך לנסיבות הספציפיות של התיק.", italic=True, color=RGBColor(0x64, 0x74, 0x8B), size=9, ) _para( f"נוצר ע״י Legal Eye בתאריך {date_str} · " f"https://legal-eye.1bigfam.com", italic=True, color=RGBColor(0x94, 0xA3, 0xB8), size=8, ) buf = BytesIO() doc.save(buf) return buf.getvalue() @app.post("/v1/memo/generate") def memo_generate(body: _MemoGenerateRequest, request: Request): # type: ignore """Build a Hebrew RTL .docx memo from a pre-computed argument analysis. Gated to Solo+ at 15 credits.""" claim = (body.claim or "").strip() if not claim: return JSONResponse(status_code=400, content={"ok": False, "reason": "empty_claim"}) analysis = body.analysis or {} if not analysis.get("pro_arguments") and not analysis.get("con_arguments"): return JSONResponse( status_code=400, content={"ok": False, "reason": "empty_analysis", "message_he": "תוצאת ניתוח חסרה — הרץ /v1/argument/analyze קודם."}) # Tier gate (15 credits per memo) user_email = (request.headers.get("X-User-Email") or "").strip() or None from ..middleware.entitlements import check_entitlement decision = check_entitlement(user_email, "memo_generate") if not decision.allowed: status = 402 if decision.reason == "credits_exhausted" else 403 return JSONResponse(status_code=status, content={ "ok": False, "reason": decision.reason, "entitlement": decision.to_dict(), "message_he": ( "המנוי שלך הגיע למכסת הקרדיטים. שדרוג ל-Pro פותח מזכרים ללא הגבלה." if decision.reason == "credits_exhausted" else "מזכר משפטי הוא פיצ׳ר חבילות בתשלום (Solo ומעלה). " "שדרג ל-Solo (₪199 לחיים — Founding 50)." ), }) # Build the DOCX try: docx_bytes = _build_argument_memo_docx( claim=claim, side=body.side or "אובייקטיבי", analysis=analysis, facts=body.facts or "", author_name=body.author_name or "", ) except RuntimeError as e: # python-docx missing — server-side install issue, not client return JSONResponse(status_code=500, content={ "ok": False, "reason": "docx_unavailable", "error": str(e)}) except Exception as e: print(f"[memo/generate] build error: {e}") return JSONResponse(status_code=500, content={ "ok": False, "reason": "build_failed", "error": f"{type(e).__name__}: {e}"}) # Filename: include date + first 20 chars of claim import datetime as _dt safe_claim = "".join(c for c in claim[:30] if c.isalnum() or c in " _-") fname = f"מזכר_{_dt.date.today().strftime('%Y-%m-%d')}_{safe_claim.strip()}.docx" headers = { "Content-Disposition": f'attachment; filename="{fname}"', # Surface remaining credits in a response header so the UI can # update the badge without a second request "X-Entitlement-Credits-Remaining": str(decision.credits_remaining), "X-Entitlement-Tier": decision.tier, } return Response( content=docx_bytes, media_type=("application/vnd.openxmlformats-officedocument" ".wordprocessingml.document"), headers=headers, ) # ────────────────────────────────────────────────────────────────────── # v2.96 (Day 7) — Tier B router observability # # GET /v1/system/shards — admin-only. Shows which Tier B shards exist, # which are loaded in memory, LRU hit/load/eviction counts. Used to # debug "why didn't query X return tier_b_results" tickets. # ────────────────────────────────────────────────────────────────────── # ────────────────────────────────────────────────────────────────────── # v2.96.6 (Day 7) — Procedural-cluster browser # # GET /v1/clusters/procedure/grouped # # Filters the global doctrine clusters (built by clustering.py over the # 17K curated corpus) to PROCEDURAL ones, then sub-classifies into # civil / criminal / administrative based on: # 1. Catalog match — if cluster anchor maps to a known doctrine in # doctrines.json, use the doctrine's sub_domain # 2. Member domain — dominant `metadata.domain` of cluster members # 3. Keyword fallback — search anchor_quote for procedural markers # # Returns clusters grouped by sub_domain. UI can render as a # navigation tree: "סדרי דין → אזרחי → השתק עילה / שיהוי / סילוק על # הסף", etc. # # Public read (no admin gate) since this is browsing curated public # legal doctrine data. # ────────────────────────────────────────────────────────────────────── _PROCEDURAL_KEYWORDS_CIVIL = ( "תקנה 100", "תקנה 41", "תקנה 42", "תקנה 43", "סדר הדין האזרחי", "תקנות סדר הדין", "סילוק על הסף", "מעשה בית-דין", "מעשה בית דין", "השתק פלוגתא", "שיהוי בהגשת תביעה", ) _PROCEDURAL_KEYWORDS_CRIMINAL = ( "חזקת חפות", "חזקת החפות", "זכות השתיקה", "אזהרה לפני חקירה", "פירות העץ המורעל", "פסילת ראיות", "ספק סביר", "מעבר לכל ספק", "סדר הדין הפלילי", ) _PROCEDURAL_KEYWORDS_ADMIN = ( "מבחן הסבירות", "מתחם הסבירות", "חובת ההנמקה", "חובת הנמקה", "שיהוי בעתירה", "סדרי המינהל", "חוק בתי משפט לעניינים מנהליים", "Wednesbury", ) def _classify_cluster_subdomain(cluster, members_resolved: list) -> str: """Return 'civil' / 'criminal' / 'administrative' / 'general'. Strategy (in order): 1. Catalog match — if anchor maps to a doctrine with sub_domain 2. Member-domain majority 3. Keyword sweep over anchor_quote + anchor_label """ # 1. Catalog match try: from ..intelligence.doctrine_classifier import load_doctrine_catalog cat = load_doctrine_catalog() anchor_label = (getattr(cluster, "anchor_label", "") or "").strip() anchor_quote = (getattr(cluster, "anchor_quote", "") or "") for doc in cat.get("doctrines", []): for case in doc.get("leading_cases", []) or []: cite = case.get("citation", "") if cite and cite in anchor_label: sd = doc.get("sub_domain") if sd in ("civil", "criminal", "administrative"): return sd # No explicit sub_domain — infer from doctrine.domain d = doc.get("domain") if d == "criminal": return "criminal" if d == "administrative": return "administrative" if d == "procedure": return "civil" except Exception: pass # 2. Member-domain majority — count metadata.domain across resolved members domain_counts: dict = {} for m in members_resolved: md = m.get("metadata") if isinstance(m, dict) else None if md: d = md.get("domain") if d: domain_counts[d] = domain_counts.get(d, 0) + 1 if domain_counts: top = max(domain_counts.items(), key=lambda x: x[1])[0] if top == "criminal": return "criminal" if top == "administrative": return "administrative" if top in ("contracts", "torts", "labor", "family", "corporate", "banking", "tax", "property", "evidence", "procedure"): return "civil" # all are civil-side procedural matters # 3. Keyword sweep haystack = ( (getattr(cluster, "anchor_label", "") or "") + " " + (getattr(cluster, "anchor_quote", "") or "") ) if any(k in haystack for k in _PROCEDURAL_KEYWORDS_CRIMINAL): return "criminal" if any(k in haystack for k in _PROCEDURAL_KEYWORDS_ADMIN): return "administrative" if any(k in haystack for k in _PROCEDURAL_KEYWORDS_CIVIL): return "civil" return "general" def _cluster_is_procedural(cluster, members_resolved: list) -> bool: """Heuristic: is this cluster about procedural rulings (vs substantive law)? Triggers when anchor or members reference procedural keywords or have domain='procedure'.""" # Quick: anchor text procedural? haystack = ( (getattr(cluster, "anchor_label", "") or "") + " " + (getattr(cluster, "anchor_quote", "") or "") ) all_kw = (_PROCEDURAL_KEYWORDS_CIVIL + _PROCEDURAL_KEYWORDS_CRIMINAL + _PROCEDURAL_KEYWORDS_ADMIN) if any(k in haystack for k in all_kw): return True # Or: cluster.domain if (getattr(cluster, "domain", None) == "procedure"): return True # Or: majority of members are procedural n_proc = sum(1 for m in members_resolved if isinstance(m, dict) and (m.get("metadata") or {}).get("domain") == "procedure") return n_proc >= max(2, len(members_resolved) // 3) @app.get("/v1/clusters/procedure/grouped") def clusters_procedure_grouped(limit_per_sub: int = 30): """Procedure-only clusters, grouped by sub-domain (civil / criminal / administrative). Returns at most `limit_per_sub` clusters per group. Each cluster includes anchor info, member count, and (if matched) the doctrine catalog entry (name_he, doctrine_id).""" try: from ..pipeline import get_pipeline from ..clustering import get_or_build_clusters, cluster_summary from ..intelligence.doctrine_classifier import load_doctrine_catalog pipe = get_pipeline() all_clusters = get_or_build_clusters(pipe) catalog = load_doctrine_catalog() # Build a lookup: citation_string → doctrine entry # (so we can hydrate clusters with matching catalog entry) citation_to_doctrine: dict = {} for doc in catalog.get("doctrines", []): for case in doc.get("leading_cases", []) or []: cite = case.get("citation", "") if cite: citation_to_doctrine[cite] = { "doctrine_id": doc["id"], "name_he": doc["name_he"], "domain": doc.get("domain"), "sub_domain": doc.get("sub_domain"), } grouped: dict = {"civil": [], "criminal": [], "administrative": [], "general": []} indexed_docs = getattr(pipe, "_indexed_docs", None) or [] doc_by_id = {d.id: d for d in indexed_docs} if indexed_docs else {} for c in all_clusters: # Resolve a small sample of members for classification sample_member_ids = (list(getattr(c, "applications", []) or [])[:10] + list(getattr(c, "origins", []) or [])[:5]) members_resolved = [] for mid in sample_member_ids: d = doc_by_id.get(mid) if d: members_resolved.append({ "id": d.id, "metadata": getattr(d, "metadata", {}) or {}, }) if not _cluster_is_procedural(c, members_resolved): continue sub = _classify_cluster_subdomain(c, members_resolved) summary = cluster_summary(c) # Hydrate with catalog match if anchor cites a known doctrine anchor_label = getattr(c, "anchor_label", "") or "" catalog_match = None for cite, ent in citation_to_doctrine.items(): if cite in anchor_label: catalog_match = ent break if catalog_match: summary["catalog_match"] = catalog_match summary["sub_domain"] = sub grouped[sub].append(summary) # Sort each group by cluster size (n_applications) and cap for k, items in grouped.items(): items.sort(key=lambda x: -(x.get("n_applications", 0) or x.get("size", 0))) grouped[k] = items[:limit_per_sub] return { "ok": True, "groups": grouped, "n_civil": len(grouped["civil"]), "n_criminal": len(grouped["criminal"]), "n_administrative": len(grouped["administrative"]), "n_general": len(grouped["general"]), "meta": { "method": "catalog_match + member_domain_majority + keyword_fallback", "catalog_doctrines": len(catalog.get("doctrines", [])), "total_clusters_in_pipe": len(all_clusters), }, } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.get("/v1/system/shards/available") def system_shards_available(): # type: ignore """Public-readable: just the list of Tier B shards loaded by the router. No auth. Used for diagnostics (e.g., confirming the multi-repo download succeeded at boot).""" try: from ..retrieve.shard_router import get_shard_router router = get_shard_router() return { "ok": True, "available_shards": sorted(router.available_shards), "n_shards": len(router.available_shards), "shards_dirs_count": len(getattr(router, "_shards_dirs", [])), } except Exception as e: return {"ok": False, "error": f"{type(e).__name__}: {e}"} @app.get("/v1/system/shards") def system_shards(request: Request): # type: ignore """Admin-only: ShardRouter status (LRU cache state, available shards, hit/load/evict counts).""" if not _check_admin_key(request): return JSONResponse(status_code=403, content={"ok": False, "reason": "admin_required"}) try: from ..retrieve.shard_router import get_shard_router router = get_shard_router() status = router.status() # Enrich with per-shard manifest contents (n_docs, n_chunks, # build_time) — discovered at boot, useful for monitoring. from pathlib import Path import json as _json shards_dir = Path(status["shards_dir"]) per_shard = {} for shard_name in status["available_shards"]: mp = shards_dir / shard_name / "retriever_state" / "manifest.json" if mp.exists(): try: per_shard[shard_name] = _json.loads( mp.read_text(encoding="utf-8")) except Exception: per_shard[shard_name] = {"error": "manifest unreadable"} return { "ok": True, "router": status, "shard_manifests": per_shard, } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "reason": "router_unavailable", "error": f"{type(e).__name__}: {e}", }) # ────────────────────────────────────────────────────────────────────── # v2.98 (Day 8) — Live demo endpoint for landing page # # POST /v1/demo/analyze # # Anonymous-allowed wrapper around /v1/argument/analyze for the # landing-page demo widget. No auth, no credit deduction, but capped # output (2 pro + 2 con) and rate-limited by IP (10/hour). # # The full version (5 pro + 5 con + tier_b + doctrine_match) stays # gated to Solo+ at 5 credits — this just shows enough to whet the # appetite for signup. The "see full analysis" button on the result # triggers the email capture flow. # ────────────────────────────────────────────────────────────────────── import time as _demo_time from collections import deque # Simple in-memory IP → request timestamps (deque). Persistent only # within process lifetime — fine since this is anti-spam, not a hard # quota. Each IP allowed 10 requests/hour. _DEMO_RATE_WINDOW_SEC = 3600 # 1 hour _DEMO_RATE_LIMIT = 10 _DEMO_RATE_BUCKETS: Dict[str, deque] = {} def _demo_check_rate(ip: str) -> tuple: """Returns (allowed: bool, retry_after_sec: int).""" now = _demo_time.time() bucket = _DEMO_RATE_BUCKETS.setdefault(ip, deque()) # Evict old timestamps while bucket and bucket[0] < now - _DEMO_RATE_WINDOW_SEC: bucket.popleft() if len(bucket) >= _DEMO_RATE_LIMIT: retry_after = int(bucket[0] + _DEMO_RATE_WINDOW_SEC - now) return False, max(retry_after, 60) bucket.append(now) return True, 0 class _DemoAnalyzeRequest(BaseModel): # type: ignore claim: str @app.post("/v1/demo/analyze") def demo_analyze(body: _DemoAnalyzeRequest, request: Request): # type: ignore """Public demo endpoint for landing-page widget. Differences from /v1/argument/analyze: - No auth required (anonymous OK) - No credit deduction - Rate-limited per-IP (10/hour) - Output capped at 2 pro + 2 con (vs 8 + 8 for full) - No `doctrine_match`, `tier_b_results`, `missing_facts` (those are paid-tier features that encourage signup) """ claim = (body.claim or "").strip() if not claim: return JSONResponse(status_code=400, content={ "ok": False, "reason": "empty_claim", "message_he": "אנא הכנס טענה לניתוח." }) if len(claim) > 600: return JSONResponse(status_code=400, content={ "ok": False, "reason": "claim_too_long", "message_he": "טענה ארוכה מ-600 תווים. קצר ונסה שוב." }) # Rate limit by IP (X-Forwarded-For first if behind proxy) fwd = request.headers.get("X-Forwarded-For", "") ip = (fwd.split(",")[0].strip() if fwd else (request.client.host if request.client else "unknown")) allowed, retry = _demo_check_rate(ip) if not allowed: return JSONResponse(status_code=429, content={ "ok": False, "reason": "rate_limit", "retry_after_sec": retry, "message_he": ( f"רק 10 ניתוחי-דמו לשעה. נסה שוב בעוד " f"{retry//60} דקות, או הירשם ל-Solo (₪199 לחיים) " f"לקבלת ניתוח ללא הגבלה." ), }) # Run the same synthesis pipeline as /v1/argument/analyze, but # capped. We inline a slim version rather than calling the full # endpoint (avoids the entitlement gate that would 403 anonymous). try: from ..pipeline import get_pipeline from ..intelligence import StrategySynthesizer from ..hierarchical_graph import get_or_build_hgraph except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "reason": "import_failed", "error": f"{type(e).__name__}: {e}", }) pipe = get_pipeline() named = getattr(pipe.retrievers, "_retrievers", {}) or {} cbr_r = named.get("hebrew_encoder") or pipe.retrievers syn = StrategySynthesizer( retriever=pipe.retrievers, cbr_retriever=cbr_r, full_text_loader=lambda did: pipe.get_text(did) or "", pipeline=pipe, polish_with_tau_llm=False, ) def _frame(side: str) -> list: try: r = syn.synthesize(user_facts=claim, side=side, top_k=5) d = r.to_dict() if hasattr(r, "to_dict") else dict(r) cba = d.get("case_based_arguments") or {} return cba.get("drafted_arguments_for_user") or [] except Exception: return [] pro_raw = _frame("plaintiff") con_raw = _frame("defendant") def _shape(arg: dict) -> dict: text = (arg.get("argument") or arg.get("text") or "").strip() sources = arg.get("source_cases") or [] s0 = sources[0] if sources else {} case_id = (s0.get("title") or s0.get("case_id") or arg.get("source_case_id")) if not case_id: # v2.99 — extract from embedded text case_id = _extract_citation_from_text(text) or "(ללא ציטוט)" return { "text": text[:400] + ("..." if len(text) > 400 else ""), "source_case": case_id, } # Dedup + cap seen = set() pro_args = [] for arg in pro_raw: src = ((arg.get("source_cases") or [{}])[0].get("case_id") or arg.get("argument", "")[:60]) if src in seen: continue seen.add(src) pro_args.append(_shape(arg)) if len(pro_args) >= 2: break con_args = [] for arg in con_raw: src = ((arg.get("source_cases") or [{}])[0].get("case_id") or arg.get("argument", "")[:60]) if src in seen: continue seen.add(src) con_args.append(_shape(arg)) if len(con_args) >= 2: break # Cheap strength score from synthesizer signals n_total = len(pro_raw) + len(con_raw) if n_total >= 6: strength = 65 + min(20, n_total) # 65-85 range caption = "טענה חזקה יחסית — פסיקה תומכת מספקת" elif n_total >= 3: strength = 50 + n_total * 5 caption = "טענה בינונית — פסיקה מעורבת" else: strength = 25 + n_total * 7 caption = "טענה חלשה — פסיקה ברובה סותרת" # Find doctrine match (catalog) — useful tease for signup doctrine_label = None try: from ..intelligence.doctrine_classifier import classify_doctrines matches = classify_doctrines(claim, k=1, min_score=1.0) if matches: doctrine_label = matches[0].name_he except Exception: pass return { "ok": True, "claim": claim, "strength_score": strength, "strength_caption": caption, "doctrine_label": doctrine_label, "pro_arguments": pro_args, # capped at 2 "con_arguments": con_args, # capped at 2 "locked": { "n_more_pro": max(0, len(pro_raw) - len(pro_args)), "n_more_con": max(0, len(con_raw) - len(con_args)), "message_he": ( "💎 הירשם כדי לראות את הניתוח המלא — כל הטענות, " "ציטוטים מילה במילה ניתנים לפתיחה, ועובדות חסרות." ), }, "meta": { "rate_limit_remaining": _DEMO_RATE_LIMIT - len(_DEMO_RATE_BUCKETS[ip]), }, } # ────────────────────────────────────────────────────────────────────── # Day 39 — Streaming demo analyze (SSE). # # Same pipeline as /v1/demo/analyze, but emits Server-Sent Events # at each phase so the user sees progressive feedback instead of # a 10-15s "מנתח..." opaque wait. # # Event types (JSON in `data:` line): # {event: "started", claim: str} # {event: "retrieving"} # {event: "side_done", side: "pro" | "con", n: int} # {event: "shaping"} # {event: "doctrine_match", name_he: str | null} # {event: "done", result: } # {event: "error", reason: str, message_he: str} # # Same rate limit (10/hr/IP) — the streaming version shares the same # bucket so users can't bypass the limit by switching endpoints. # ────────────────────────────────────────────────────────────────────── @app.post("/v1/demo/analyze/stream") def demo_analyze_stream(body: _DemoAnalyzeRequest, request: Request): # type: ignore """SSE-streaming variant of /v1/demo/analyze. Same payload at the end, but with phase events along the way.""" from fastapi.responses import StreamingResponse as _SR import json as _j claim = (body.claim or "").strip() def _sse(obj): return f"data: {_j.dumps(obj, ensure_ascii=False)}\n\n" def gen(): # ── input validation ── if not claim: yield _sse({"event": "error", "reason": "empty_claim", "message_he": "אנא הכנס טענה לניתוח."}) return if len(claim) > 600: yield _sse({"event": "error", "reason": "claim_too_long", "message_he": "טענה ארוכה מ-600 תווים. קצר ונסה שוב."}) return # ── rate limit ── fwd = request.headers.get("X-Forwarded-For", "") ip = (fwd.split(",")[0].strip() if fwd else (request.client.host if request.client else "unknown")) allowed, retry = _demo_check_rate(ip) if not allowed: yield _sse({ "event": "error", "reason": "rate_limit", "retry_after_sec": retry, "message_he": ( f"רק 10 ניתוחי-דמו לשעה. נסה שוב בעוד " f"{retry//60} דקות, או הירשם ל-Solo (₪199 לחיים)." ), }) return # ── started ── yield _sse({"event": "started", "claim": claim[:120]}) # ── import + build synthesizer ── try: from ..pipeline import get_pipeline from ..intelligence import StrategySynthesizer except Exception as e: yield _sse({"event": "error", "reason": "import_failed", "message_he": f"שגיאת מערכת: {type(e).__name__}"}) return pipe = get_pipeline() named = getattr(pipe.retrievers, "_retrievers", {}) or {} cbr_r = named.get("hebrew_encoder") or pipe.retrievers syn = StrategySynthesizer( retriever=pipe.retrievers, cbr_retriever=cbr_r, full_text_loader=lambda did: pipe.get_text(did) or "", pipeline=pipe, polish_with_tau_llm=False, ) # ── retrieve plaintiff side ── yield _sse({"event": "retrieving", "side": "pro", "message_he": "מחפש פסיקה לטיעוני בעד..."}) try: r = syn.synthesize(user_facts=claim, side="plaintiff", top_k=5) d = r.to_dict() if hasattr(r, "to_dict") else dict(r) cba = d.get("case_based_arguments") or {} pro_raw = cba.get("drafted_arguments_for_user") or [] except Exception: pro_raw = [] yield _sse({"event": "side_done", "side": "pro", "n": len(pro_raw)}) # ── retrieve defendant side ── yield _sse({"event": "retrieving", "side": "con", "message_he": "מחפש פסיקה לטיעונים נגד..."}) try: r = syn.synthesize(user_facts=claim, side="defendant", top_k=5) d = r.to_dict() if hasattr(r, "to_dict") else dict(r) cba = d.get("case_based_arguments") or {} con_raw = cba.get("drafted_arguments_for_user") or [] except Exception: con_raw = [] yield _sse({"event": "side_done", "side": "con", "n": len(con_raw)}) # ── shape + dedup ── yield _sse({"event": "shaping", "message_he": "מתעצב ומסיר כפילויות..."}) def _shape(arg): text = (arg.get("argument") or arg.get("text") or "").strip() sources = arg.get("source_cases") or [] s0 = sources[0] if sources else {} case_id = (s0.get("title") or s0.get("case_id") or arg.get("source_case_id")) if not case_id: case_id = _extract_citation_from_text(text) or "(ללא ציטוט)" return { "text": text[:400] + ("..." if len(text) > 400 else ""), "source_case": case_id, } seen = set() pro_args = [] for arg in pro_raw: src = ((arg.get("source_cases") or [{}])[0].get("case_id") or arg.get("argument", "")[:60]) if src in seen: continue seen.add(src) pro_args.append(_shape(arg)) if len(pro_args) >= 2: break con_args = [] for arg in con_raw: src = ((arg.get("source_cases") or [{}])[0].get("case_id") or arg.get("argument", "")[:60]) if src in seen: continue seen.add(src) con_args.append(_shape(arg)) if len(con_args) >= 2: break # ── strength + doctrine ── n_total = len(pro_raw) + len(con_raw) if n_total >= 6: strength = 65 + min(20, n_total) caption = "טענה חזקה יחסית — פסיקה תומכת מספקת" elif n_total >= 3: strength = 50 + n_total * 5 caption = "טענה בינונית — פסיקה מעורבת" else: strength = 25 + n_total * 7 caption = "טענה חלשה — פסיקה ברובה סותרת" # Day 42 — explicit pacing between each visible phase so the user # actually SEES each step appear (was: 5 events landed in <50ms # because doctrine_match, summary, pro_start are all fast python # ops — felt like a single flash, not progressive streaming). import time as _ti PHASE_PAUSE = 0.55 # gap between distinct UI phases CARD_PAUSE = 0.55 # gap between successive argument cards # ── doctrine match (separate, visible) ── doctrine_label = None try: from ..intelligence.doctrine_classifier import classify_doctrines matches = classify_doctrines(claim, k=1, min_score=1.0) if matches: doctrine_label = matches[0].name_he except Exception: pass _ti.sleep(PHASE_PAUSE) yield _sse({"event": "doctrine_match", "name_he": doctrine_label}) # ── summary (doctrine + strength + locked counts) ── _ti.sleep(PHASE_PAUSE) yield _sse({ "event": "summary", "doctrine_label": doctrine_label, "strength_score": strength, "strength_caption": caption, "n_pro": len(pro_args), "n_con": len(con_args), "locked": { "n_more_pro": max(0, len(pro_raw) - len(pro_args)), "n_more_con": max(0, len(con_raw) - len(con_args)), "message_he": ( "💎 הירשם כדי לראות את הניתוח המלא — כל הטענות, " "ציטוטים מילה במילה ניתנים לפתיחה, ועובדות חסרות." ), }, }) # ── Pro side ── _ti.sleep(PHASE_PAUSE) yield _sse({"event": "pro_start", "n_total": len(pro_args)}) for i, arg in enumerate(pro_args): _ti.sleep(CARD_PAUSE) yield _sse({ "event": "pro_argument", "index": i, "n_total": len(pro_args), "arg": arg, }) # ── Con side ── _ti.sleep(PHASE_PAUSE) yield _sse({"event": "con_start", "n_total": len(con_args)}) for i, arg in enumerate(con_args): _ti.sleep(CARD_PAUSE) yield _sse({ "event": "con_argument", "index": i, "n_total": len(con_args), "arg": arg, }) # ── Done — meta only (UI uses this to enable the email-capture form) ── _ti.sleep(PHASE_PAUSE) yield _sse({ "event": "done", "meta": { "rate_limit_remaining": _DEMO_RATE_LIMIT - len(_DEMO_RATE_BUCKETS[ip]), "claim": claim, }, }) return _SR(gen(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "X-Accel-Buffering": "no", # disable proxy buffering "Connection": "keep-alive", }) # ────────────────────────────────────────────────────────────────────── # v2.99.2 (Day 11) — Opposing Counsel Mode # # POST /v1/argument/opposing-counsel # # Pro-tier flagship feature. Given a claim + the user's side, returns # the strongest ADVERSARIAL analysis: what the opposing counsel would # argue, how they'd attack, and what facts weaken the user's position. # # Gated to Pro+ (8 credits). Differs from /v1/argument/analyze: # - Returns 6-8 opposing arguments (not 2-3) # - Reframes "missing facts" as "weaknesses" (advantage to opponent) # - Lists procedural attack vectors (שיהוי, מעשה בית-דין, התיישנות) # - Doctrine exceptions ranked by attack utility # # Purpose: lawyer pressure-tests their case BEFORE the opponent does. # ────────────────────────────────────────────────────────────────────── class _OpposingCounselRequest(BaseModel): # type: ignore claim: str side: Optional[str] = "תובע" # the user's side — we attack from the OTHER top_k: int = 10 # Procedural attack vectors — generic challenges any opponent can raise. # Ranked by how often they're decisive in practice. _PROCEDURAL_ATTACK_VECTORS_HE = [ {"label": "שיהוי", "desc": "אם התביעה הוגשה זמן רב לאחר התגבשות העילה — טענה שהשתהות יצרה הסתמכות / איבדה ראיות.", "doctrine_id": "shihui_tviya_ezrachit"}, {"label": "התיישנות", "desc": "בדיקה האם תקופת ההתיישנות הסטטוטורית פגעה בעילה — ס' 5-7 לחוק ההתיישנות, התשי\"ח-1958.", "doctrine_id": None}, {"label": "מעשה בית-דין", "desc": "האם הסוגיה כבר נדונה והוכרעה — השתק עילה או השתק פלוגתא.", "doctrine_id": "maase_beit_din"}, {"label": "חוסר עילה / סילוק על הסף", "desc": "תקיפת התביעה כבר בכתב התביעה — תקנה 41-43.", "doctrine_id": "siluk_al_ha_saf"}, {"label": "אי-מיצוי הליכים", "desc": "האם פוצה הפנייה הקודמת לרשות מוסמכת לפני הגשת התביעה?", "doctrine_id": None}, {"label": "חוסר תום-לב מצד התובע", "desc": "טענת תום-הלב פועלת בשני הכיוונים — ניתן להפנותה נגד עמדת התובע.", "doctrine_id": "tom_lev_choze"}, ] def _flip_side(side: str) -> str: """Return the adversarial side.""" s = (side or "").strip() if s in ("תובע", "plaintiff", "claimant"): return "defendant" if s in ("נתבע", "defendant", "respondent"): return "plaintiff" return "defendant" # default attack from defendant's perspective @app.post("/v1/argument/opposing-counsel") def opposing_counsel(body: _OpposingCounselRequest, request: Request): # type: ignore """Adversarial analysis from the opposing counsel's perspective. Pro-gated at 8 credits.""" claim = (body.claim or "").strip() if not claim: return JSONResponse(status_code=400, content={"ok": False, "reason": "empty_claim"}) # Tier gate — opposing_counsel is in pro/firm unlocked_endpoints user_email = (request.headers.get("X-User-Email") or "").strip() or None from ..middleware.entitlements import check_entitlement decision = check_entitlement(user_email, "opposing_counsel") if not decision.allowed: status = 402 if decision.reason == "credits_exhausted" else 403 return JSONResponse(status_code=status, content={ "ok": False, "reason": decision.reason, "entitlement": decision.to_dict(), "message_he": ( "המנוי שלך הגיע למכסת הקרדיטים." if decision.reason == "credits_exhausted" else "🛡 Opposing Counsel הוא פיצ׳ר Pro Litigator בלבד " "(₪899/חודש). שדרג כדי לחשוף את החולשות בטענה לפני " "שהצד השני עושה את זה." ), }) # Run synthesizer from the ADVERSARIAL side — give us their best shots try: from ..pipeline import get_pipeline from ..intelligence import StrategySynthesizer from ..hierarchical_graph import get_or_build_hgraph except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "reason": "import_failed", "error": f"{type(e).__name__}: {e}", }) adversary_side = _flip_side(body.side) pipe = get_pipeline() named = getattr(pipe.retrievers, "_retrievers", {}) or {} cbr_r = named.get("hebrew_encoder") or pipe.retrievers syn = StrategySynthesizer( retriever=pipe.retrievers, cbr_retriever=cbr_r, full_text_loader=lambda did: pipe.get_text(did) or "", pipeline=pipe, polish_with_tau_llm=False, ) # Synthesize the ATTACKING side's case attack_args: list = [] try: r = syn.synthesize(user_facts=claim, side=adversary_side, top_k=body.top_k) d = r.to_dict() if hasattr(r, "to_dict") else dict(r) drafts = (d.get("case_based_arguments") or {}).get( "drafted_arguments_for_user") or [] for arg in drafts[:8]: text = (arg.get("argument") or "").strip() sources = arg.get("source_cases") or [] s0 = sources[0] if sources else {} case_id = (s0.get("title") or s0.get("case_id") or arg.get("source_case_id")) if not case_id: case_id = _extract_citation_from_text(text) or "(ללא ציטוט)" attack_args.append({ "text": text, "source_case": case_id, "strength": "high" if arg.get("score", 0) > 10 else "medium", }) except Exception as e: print(f"[opposing_counsel] synth failed: {e}") # Doctrine match — gives us a known doctrine that the attacker can use counter_doctrines: list = [] try: from ..intelligence.doctrine_classifier import classify_doctrines matches = classify_doctrines(claim, k=3, min_score=1.0) for m in matches: d_dict = m.to_dict() # For Opposing Counsel: surface the doctrine's EXCEPTIONS # (these are what the attacker uses to escape liability) from ..intelligence.doctrine_classifier import get_doctrine_by_id full = get_doctrine_by_id(m.doctrine_id) or {} d_dict["exceptions"] = full.get("exceptions") or [] counter_doctrines.append(d_dict) except Exception as e: print(f"[opposing_counsel] doctrine match failed: {e}") # "Weaknesses" — derive from Tier A bundle's missing-facts weaknesses: list = [] try: hg = get_or_build_hgraph(pipe) bundle = hg.build_argument(claim, side=body.side or "plaintiff") bd = bundle.to_dict() for fm in bd.get("fact_mapping") or []: if isinstance(fm, dict) and fm.get("covered") is False: w = fm.get("element") or fm.get("label") or "" if w: weaknesses.append({ "fact": w, "exploitation": ( "טיעון אדוורסרי יציין שעובדה זו חסרה — והעדרה " "מחליש את הטענה מהותית." ), }) except Exception as e: print(f"[opposing_counsel] weaknesses extraction failed: {e}") return { "ok": True, "claim": claim, "user_side": body.side or "תובע", "adversary_side": adversary_side, "attack_arguments": attack_args[:8], "procedural_attacks": _PROCEDURAL_ATTACK_VECTORS_HE[:4], "counter_doctrines": counter_doctrines[:3], "weaknesses_in_your_claim": weaknesses[:5], "verdict_he": ( "🛡 מה הצד השני יטען נגדך — בלי לסנן. השתמש בזה כדי " "להכין תשובות לפני הדיון, לא במקום עורך-דין." ), "entitlement": decision.to_dict(), "meta": { "method": "adversarial_dual_frame", "n_attack_args": len(attack_args), "n_weaknesses": len(weaknesses), }, } # ────────────────────────────────────────────────────────────────────── # v2.99.1 (Day 10) — Demo lead capture # # POST /v1/demo/lead — stores email + optional claim context from # landing-page conversion form. SQLite-backed (`runtime/leads.db`), # rate-limited per-IP (5 leads/hour to prevent spam). # # Used by the landing demo widget after results display: # "💌 רוצה את הניתוח המלא? תן email + נשלח אותו אליך" # ────────────────────────────────────────────────────────────────────── _LEADS_DB = "tau_rag/runtime/leads.db" _LEAD_RATE_BUCKETS: Dict[str, deque] = {} _LEAD_RATE_LIMIT = 5 _LEAD_RATE_WINDOW_SEC = 3600 class _DemoLeadRequest(BaseModel): # type: ignore email: str claim: Optional[str] = "" name: Optional[str] = "" source: Optional[str] = "landing_demo" # The claim the user analyzed before submitting (for context/follow-up) last_analyzed_claim: Optional[str] = "" def _ensure_leads_db(): """Idempotent schema. Stored alongside other runtime DBs.""" import sqlite3 from pathlib import Path Path(_LEADS_DB).parent.mkdir(parents=True, exist_ok=True) with sqlite3.connect(_LEADS_DB, timeout=5.0) as c: c.execute(""" CREATE TABLE IF NOT EXISTS leads ( id INTEGER PRIMARY KEY AUTOINCREMENT, email TEXT NOT NULL, name TEXT, claim TEXT, last_analyzed_claim TEXT, source TEXT, ip TEXT, user_agent TEXT, created_at INTEGER NOT NULL ) """) c.execute("CREATE INDEX IF NOT EXISTS idx_leads_email ON leads(email)") c.execute("CREATE INDEX IF NOT EXISTS idx_leads_ts ON leads(created_at)") # Day 28 — status + welcome tracking. Use ALTER TABLE so existing rows # default to 'new' and 0 (idempotent: silently ignore "duplicate column"). for col, ddl in [ ("status", "TEXT DEFAULT 'new'"), ("sent_welcome", "INTEGER DEFAULT 0"), ("welcome_method", "TEXT"), ("admin_note", "TEXT"), ("updated_at", "INTEGER"), ]: try: c.execute(f"ALTER TABLE leads ADD COLUMN {col} {ddl}") except Exception: pass c.execute("CREATE INDEX IF NOT EXISTS idx_leads_status ON leads(status)") # Day 28 — Welcome email via Resend (if RESEND_API_KEY set), else queue file _WELCOME_QUEUE_FILE = "tau_rag/runtime/welcome_queue.jsonl" def _send_welcome_email(lead: Dict[str, Any]) -> str: """Try to send welcome email via Resend. Falls back to queue file. Returns 'sent' / 'queued' / 'failed'.""" import json as _j from pathlib import Path email = (lead.get("email") or "").strip().lower() name = (lead.get("name") or "").strip() if not email: return "failed" # Build the email body in Hebrew greeting = f"שלום {name}," if name else "שלום," subject = "ברוך/ה הבא/ה ל-Legal Eye — ניתוח הטענה שלך" text_body = ( f"{greeting}\n\n" "תודה שניסית את Legal Eye!\n\n" "אנחנו עורכי-דין שבונים מנוע מודיעין משפטי בעברית — verbatim ממש " "מהפסיקה, אפס LLM חיצוני, אפס הזיות AI. הציון הציבורי שלנו " "מתעדכן אוטומטית כל שבוע:\n" " https://legal-eye.1bigfam.com/eval\n\n" "מה הלאה?\n\n" "1. כנס לאפליקציה (3 שאלות חינמיות, ללא חשבון):\n" " https://legal-eye.1bigfam.com\n\n" "2. תרצה לראות איך זה שונה מ-ChatGPT? השוואה צד-לצד:\n" " https://legal-eye.1bigfam.com/compare\n\n" "3. תרצה להצטרף ל-Founding 50? ₪199 לכל החיים, 50 מקומות בלבד:\n" " https://legal-eye.1bigfam.com/landing#pricing\n\n" "ענה לאימייל הזה עם כל שאלה — אני קורא אישית.\n\n" "אברי ברזל, עו\"ד\n" "Founder, Legal Eye\n" "avribarzel@gmail.com\n" ) api_key = _os.environ.get("RESEND_API_KEY", "").strip() if api_key: try: import urllib.request as _urlr, urllib.error as _urle req = _urlr.Request( "https://api.resend.com/emails", data=_j.dumps({ "from": "Legal Eye ", "to": [email], "subject": subject, "text": text_body, "reply_to": "avribarzel@gmail.com", }).encode("utf-8"), headers={ "Authorization": f"Bearer {api_key}", "Content-Type": "application/json", }, method="POST", ) with _urlr.urlopen(req, timeout=10) as resp: _ = resp.read() return "sent" except _urle.HTTPError as e: print(f"[welcome] Resend HTTP {e.code}: {e.read().decode('utf-8', 'ignore')[:200]}") except Exception as e: print(f"[welcome] Resend send failed: {e}") # fall through to queue # Queue mode — user reads this file periodically and sends manually try: Path(_WELCOME_QUEUE_FILE).parent.mkdir(parents=True, exist_ok=True) with open(_WELCOME_QUEUE_FILE, "a", encoding="utf-8") as f: f.write(_j.dumps({ "email": email, "name": name, "subject": subject, "body": text_body, "queued_at": int(_time.time()), }, ensure_ascii=False) + "\n") return "queued" except Exception as e: print(f"[welcome] queue write failed: {e}") return "failed" @app.post("/v1/demo/lead") def demo_lead(body: _DemoLeadRequest, request: Request): # type: ignore """Captures landing-page demo email leads to leads.db.""" import sqlite3 import time as _ti email = (body.email or "").strip().lower() # Light email-shape check (full RFC is overkill) if not email or "@" not in email or "." not in email.split("@", 1)[-1]: return JSONResponse(status_code=400, content={ "ok": False, "reason": "invalid_email", "message_he": "כתובת אימייל לא תקינה." }) fwd = request.headers.get("X-Forwarded-For", "") ip = (fwd.split(",")[0].strip() if fwd else (request.client.host if request.client else "unknown")) ua = (request.headers.get("User-Agent") or "")[:200] # Rate limit per IP bucket = _LEAD_RATE_BUCKETS.setdefault(ip, deque()) now = _ti.time() while bucket and bucket[0] < now - _LEAD_RATE_WINDOW_SEC: bucket.popleft() if len(bucket) >= _LEAD_RATE_LIMIT: return JSONResponse(status_code=429, content={ "ok": False, "reason": "rate_limit", "message_he": "יותר מדי הרשמות מ-IP זה. נסה שוב בעוד שעה." }) bucket.append(now) _ensure_leads_db() try: with sqlite3.connect(_LEADS_DB, timeout=5.0) as c: cur = c.execute(""" INSERT INTO leads (email, name, claim, last_analyzed_claim, source, ip, user_agent, created_at, status, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, 'new', ?) """, ( email, (body.name or "")[:120], (body.claim or "")[:600], (body.last_analyzed_claim or "")[:600], (body.source or "landing_demo")[:60], ip, ua, int(now), int(now), )) new_id = cur.lastrowid except Exception as e: print(f"[demo/lead] db write failed: {e}") return JSONResponse(status_code=500, content={ "ok": False, "reason": "db_error", "message_he": "שגיאה זמנית בשמירת ההודעה. נסה שוב או שלח אימייל ידנית ל-support@legal-eye.app" }) print(f"[demo/lead] captured: {email} (source={body.source} ip={ip})") # Day 28 — fire welcome email (via Resend if configured, else queue) try: welcome_status = _send_welcome_email({ "email": email, "name": body.name or "", }) if welcome_status in ("sent", "queued"): with sqlite3.connect(_LEADS_DB, timeout=5.0) as c: c.execute(""" UPDATE leads SET sent_welcome=1, welcome_method=?, updated_at=? WHERE id=? """, (welcome_status, int(_time.time()), new_id)) print(f"[demo/lead] welcome {welcome_status} for {email}") except Exception as e: # Don't fail the lead capture if welcome send fails print(f"[demo/lead] welcome send failed (non-fatal): {e}") return { "ok": True, "message_he": ( "תודה! שלחנו אימייל קצר עם קישור לאפליקציה. " "אם הוא לא הגיע — בדוק בספאם, או צור קשר ישיר ב-avribarzel@gmail.com." ), } @app.get("/v1/admin/leads") def admin_leads_list(request: Request, limit: int = 100, status: Optional[str] = None): # type: ignore """Admin read of captured leads. Use this to follow up Founding 50 conversions: each lead is a warm prospect who saw real results. Optional `status` filter: new / contacted / converted / churned.""" if not _check_admin_key(request): return JSONResponse(status_code=403, content={"ok": False, "reason": "admin_required"}) import sqlite3 _ensure_leads_db() try: with sqlite3.connect(_LEADS_DB, timeout=5.0) as c: c.row_factory = sqlite3.Row if status and status in ("new", "contacted", "converted", "churned"): cur = c.execute(""" SELECT id, email, name, claim, last_analyzed_claim, source, created_at, status, sent_welcome, welcome_method, admin_note, updated_at FROM leads WHERE status = ? ORDER BY created_at DESC LIMIT ? """, (status, max(1, min(limit, 500)))) rows = [dict(r) for r in cur.fetchall()] # Get counts by status for summary counts = {r["status"]: r["n"] for r in c.execute( "SELECT status, COUNT(*) AS n FROM leads GROUP BY status" ).fetchall()} return {"ok": True, "n": len(rows), "leads": rows, "counts": counts} cur = c.execute(""" SELECT id, email, name, claim, last_analyzed_claim, source, created_at, status, sent_welcome, welcome_method, admin_note, updated_at FROM leads ORDER BY created_at DESC LIMIT ? """, (max(1, min(limit, 500)),)) rows = [dict(r) for r in cur.fetchall()] counts = {r["status"]: r["n"] for r in c.execute( "SELECT status, COUNT(*) AS n FROM leads GROUP BY status" ).fetchall()} return {"ok": True, "n": len(rows), "leads": rows, "counts": counts} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) # Day 28 — Admin can update lead status + add a note. Status transitions: # new → contacted → converted (or churned) class _LeadUpdateRequest(BaseModel): # type: ignore id: int status: Optional[str] = None admin_note: Optional[str] = None @app.post("/v1/admin/leads/update") def admin_leads_update(body: _LeadUpdateRequest, request: Request): # type: ignore """Update a lead's status and/or note.""" if not _check_admin_key(request): return JSONResponse(status_code=403, content={"ok": False, "reason": "admin_required"}) import sqlite3 _ensure_leads_db() fields, values = [], [] if body.status and body.status in ("new", "contacted", "converted", "churned"): fields.append("status = ?") values.append(body.status) if body.admin_note is not None: fields.append("admin_note = ?") values.append((body.admin_note or "")[:500]) if not fields: return JSONResponse(status_code=400, content={ "ok": False, "reason": "no_fields_to_update" }) fields.append("updated_at = ?") values.append(int(_time.time())) values.append(int(body.id)) try: with sqlite3.connect(_LEADS_DB, timeout=5.0) as c: cur = c.execute( f"UPDATE leads SET {', '.join(fields)} WHERE id = ?", tuple(values), ) if cur.rowcount == 0: return JSONResponse(status_code=404, content={ "ok": False, "reason": "lead_not_found" }) return {"ok": True, "updated": cur.rowcount} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) @app.get("/v1/admin/welcome_queue") def admin_welcome_queue(request: Request): # type: ignore """If RESEND_API_KEY is not set, this returns the pending welcome emails that need manual sending. Each line is one email.""" if not _check_admin_key(request): return JSONResponse(status_code=403, content={"ok": False, "reason": "admin_required"}) from pathlib import Path p = Path(_WELCOME_QUEUE_FILE) if not p.exists(): return {"ok": True, "n_queued": 0, "queue": [], "resend_configured": bool(_os.environ.get("RESEND_API_KEY"))} items = [] try: import json as _j with open(p, encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(_j.loads(line)) except Exception: pass except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) return { "ok": True, "n_queued": len(items), "queue": items[-50:], # last 50 "resend_configured": bool(_os.environ.get("RESEND_API_KEY")), } # ────────────────────────────────────────────────────────────────────── # Day 17 — Founding 50 scarcity + lightweight analytics # # Two small, file-based subsystems that share a sqlite DB at # `runtime/funnel.db`: # • `founding50` (single row) — tracks how many of the 50 Founding # slots are claimed. Updated by the admin as deals close. # Public read endpoint powers the urgency widget on the landing. # • `events` — minimal event log: page_view, cta_click, demo_started, # demo_completed, email_submitted. POSTed from the static pages, # visible to the admin via /v1/admin/funnel. # ────────────────────────────────────────────────────────────────────── _FUNNEL_DB = "tau_rag/runtime/funnel.db" _FOUNDING_TOTAL = 50 _TRACK_RATE_BUCKETS: Dict[str, deque] = {} _TRACK_RATE_LIMIT = 60 # 60 events/min/IP — generous _TRACK_RATE_WINDOW_SEC = 60 def _ensure_funnel_db(): import sqlite3 from pathlib import Path Path(_FUNNEL_DB).parent.mkdir(parents=True, exist_ok=True) with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c: c.execute(""" CREATE TABLE IF NOT EXISTS founding50 ( id INTEGER PRIMARY KEY CHECK (id = 1), taken INTEGER NOT NULL DEFAULT 0, updated_at INTEGER NOT NULL ) """) c.execute(""" INSERT OR IGNORE INTO founding50 (id, taken, updated_at) VALUES (1, 0, ?) """, (int(_time.time()),)) c.execute(""" CREATE TABLE IF NOT EXISTS events ( id INTEGER PRIMARY KEY AUTOINCREMENT, event TEXT NOT NULL, page TEXT, props TEXT, ip_hash TEXT, referrer TEXT, ua TEXT, created_at INTEGER NOT NULL ) """) c.execute("CREATE INDEX IF NOT EXISTS idx_ev_ts ON events(created_at)") c.execute("CREATE INDEX IF NOT EXISTS idx_ev_event ON events(event)") @app.get("/v1/founding50/status") def founding50_status(): # type: ignore """Public counter for the urgency widget. Returns taken + spots_left.""" import sqlite3 _ensure_funnel_db() try: with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c: row = c.execute( "SELECT taken, updated_at FROM founding50 WHERE id=1" ).fetchone() taken = int(row[0]) if row else 0 updated_at = int(row[1]) if row else int(_time.time()) taken = max(0, min(taken, _FOUNDING_TOTAL)) return { "ok": True, "taken": taken, "total": _FOUNDING_TOTAL, "spots_left": _FOUNDING_TOTAL - taken, "updated_at": updated_at, } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) class _Founding50SetRequest(BaseModel): # type: ignore taken: int @app.post("/v1/admin/founding50/set") def admin_founding50_set(body: _Founding50SetRequest, request: Request): # type: ignore """Admin updates the count as deals close.""" if not _check_admin_key(request): return JSONResponse(status_code=403, content={"ok": False, "reason": "admin_required"}) import sqlite3 _ensure_funnel_db() taken = max(0, min(int(body.taken), _FOUNDING_TOTAL)) try: with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c: c.execute( "UPDATE founding50 SET taken=?, updated_at=? WHERE id=1", (taken, int(_time.time())), ) return {"ok": True, "taken": taken, "total": _FOUNDING_TOTAL, "spots_left": _FOUNDING_TOTAL - taken} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) class _TrackEventRequest(BaseModel): # type: ignore event: str page: Optional[str] = "" props: Optional[Dict[str, Any]] = None referrer: Optional[str] = "" @app.post("/v1/track") def track_event(body: _TrackEventRequest, request: Request): # type: ignore """Logs a single funnel event. Public — rate-limited per IP. Stores only an IP HASH (not raw IP) for privacy.""" import sqlite3, json as _j, hashlib ev = (body.event or "").strip()[:60] if not ev: return JSONResponse(status_code=400, content={ "ok": False, "reason": "missing_event" }) fwd = request.headers.get("X-Forwarded-For", "") ip = (fwd.split(",")[0].strip() if fwd else (request.client.host if request.client else "unknown")) ip_hash = hashlib.sha256(("le-track:" + ip).encode()).hexdigest()[:16] # Rate limit per IP (counts on raw IP, not hash, so we don't have to # reverse-hash on each request) bucket = _TRACK_RATE_BUCKETS.setdefault(ip, deque()) now = _time.time() while bucket and bucket[0] < now - _TRACK_RATE_WINDOW_SEC: bucket.popleft() if len(bucket) >= _TRACK_RATE_LIMIT: return JSONResponse(status_code=429, content={ "ok": False, "reason": "rate_limit" }) bucket.append(now) ua = (request.headers.get("User-Agent") or "")[:200] referrer = (body.referrer or request.headers.get("Referer") or "")[:200] props_json = "" try: if body.props is not None: # Cap size to avoid abuse props_json = _j.dumps(body.props, ensure_ascii=False)[:500] except Exception: pass _ensure_funnel_db() try: with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c: c.execute(""" INSERT INTO events (event, page, props, ip_hash, referrer, ua, created_at) VALUES (?, ?, ?, ?, ?, ?, ?) """, ( ev, (body.page or "")[:120], props_json, ip_hash, referrer, ua, int(now), )) except Exception as e: print(f"[track] db write failed: {e}") return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) return {"ok": True} @app.get("/v1/admin/funnel") def admin_funnel(request: Request, hours: int = 24): # type: ignore """Admin summary: event counts, per-page funnels, conversion ratios.""" if not _check_admin_key(request): return JSONResponse(status_code=403, content={"ok": False, "reason": "admin_required"}) import sqlite3 _ensure_funnel_db() hours = max(1, min(int(hours), 24 * 30)) cutoff = int(_time.time()) - hours * 3600 try: with sqlite3.connect(_FUNNEL_DB, timeout=5.0) as c: c.row_factory = sqlite3.Row by_event = [dict(r) for r in c.execute(""" SELECT event, COUNT(*) AS n FROM events WHERE created_at >= ? GROUP BY event ORDER BY n DESC """, (cutoff,)).fetchall()] by_page = [dict(r) for r in c.execute(""" SELECT page, COUNT(*) AS n FROM events WHERE created_at >= ? AND event = 'page_view' GROUP BY page ORDER BY n DESC """, (cutoff,)).fetchall()] uniques = c.execute(""" SELECT COUNT(DISTINCT ip_hash) FROM events WHERE created_at >= ? """, (cutoff,)).fetchone()[0] recent = [dict(r) for r in c.execute(""" SELECT event, page, props, referrer, created_at FROM events WHERE created_at >= ? ORDER BY created_at DESC LIMIT 50 """, (cutoff,)).fetchall()] # Conversion ratios — same window ev_map = {r["event"]: r["n"] for r in by_event} return { "ok": True, "window_hours": hours, "unique_visitors_ip_hash": uniques, "by_event": by_event, "by_page": by_page, "funnel": { "page_views": ev_map.get("page_view", 0), "demo_started": ev_map.get("demo_started", 0), "demo_completed": ev_map.get("demo_completed", 0), "email_submitted": ev_map.get("email_submitted", 0), }, "recent_events": recent, } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) # ────────────────────────────────────────────────────────────────────── # Day 23 — Public doctrine catalog summary endpoint # # Exposes the curated doctrine catalog so external clients (the # landing scorecard, the marketing materials, third-party auditors) # can verify how many doctrines are curated and what the anchors are. # Returns a flattened summary — `id`, `name_he`, `name_en`, `domain`, # `anchor_case`, `n_leading_cases` — not the full schema (elements, # exceptions, keywords stay internal). # ────────────────────────────────────────────────────────────────────── class _DoctrineClassifyRequest(BaseModel): # type: ignore text: str k: int = 3 min_score: float = 1.0 @app.post("/v1/doctrines/classify") def doctrines_classify(body: _DoctrineClassifyRequest): # type: ignore """Day 49 — classify a free-text query against the doctrine catalog. Returns the top-k matching doctrines with their metadata so the frontend can render a "related doctrines" panel inline with the lawyer-ask result (topic-dossier UX). """ if not (body.text or "").strip(): return {"ok": True, "matches": [], "n_total": 0} try: from ..intelligence.doctrine_classifier import ( classify_doctrines, load_doctrine_catalog, ) cat = load_doctrine_catalog() idx = {d["id"]: d for d in cat.get("doctrines", [])} matches = classify_doctrines(body.text, k=body.k, min_score=body.min_score) out = [] for m in matches: doc = idx.get(m.doctrine_id, {}) leading = (doc.get("leading_cases") or [])[:1] anchor_case = leading[0].get("citation") if leading else None anchor_year = leading[0].get("year") if leading else None out.append({ "id": m.doctrine_id, "name_he": m.name_he, "domain": m.domain, "score": round(m.score, 2), "anchor_case": anchor_case, "anchor_year": anchor_year, "statute": ((doc.get("statute_refs") or [{}])[0] .get("law") if doc.get("statute_refs") else None), "pending_review": bool(doc.get("_pending_lawyer_review")), # Day 49 Phase 2.5 — refinement chips. Pre-curated Hebrew # sub-topic phrases per doctrine. Frontend appends to the # user's question and re-submits to narrow the topic. "refinements": doc.get("refinements") or [], }) return {"ok": True, "matches": out, "n_total": len(out)} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) @app.get("/v1/doctrines/catalog") def doctrines_catalog_public(): # type: ignore """Public read-only summary of the curated doctrine catalog.""" try: from ..intelligence.doctrine_classifier import load_doctrine_catalog cat = load_doctrine_catalog() doctrines = cat.get("doctrines", []) or [] items = [] for d in doctrines: leading = d.get("leading_cases") or [] items.append({ "id": d.get("id"), "name_he": d.get("name_he"), "name_en": d.get("name_en"), "domain": d.get("domain"), "anchor_case": (leading[0].get("citation") if leading else None), "anchor_year": (leading[0].get("year") if leading else None), "n_leading_cases": len(leading), "n_statute_refs": len(d.get("statute_refs") or []), }) # Domain breakdown from collections import Counter as _Counter by_domain = dict(_Counter(it["domain"] for it in items if it["domain"])) return { "ok": True, "n_doctrines": len(items), "by_domain": by_domain, "doctrines": items, "_schema_version": cat.get("_schema_version"), "_disclaimer": cat.get("_disclaimer"), } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) @app.get("/v1/doctrines/{doctrine_id}") def doctrines_get_one(doctrine_id: str): # type: ignore """Day 49 — full per-doctrine details for the dossier expand-on-click. Returns name_he, elements, exceptions, statute_refs, all leading_cases, and the `pending_lawyer_review` flag. Keywords stay internal — they're a tuning surface, not user-facing. NOTE: must be registered AFTER /v1/doctrines/catalog so the static path wins over the dynamic match. """ try: from ..intelligence.doctrine_classifier import load_doctrine_catalog cat = load_doctrine_catalog() idx = {d["id"]: d for d in cat.get("doctrines", [])} doc = idx.get(doctrine_id) if not doc: return JSONResponse(status_code=404, content={ "ok": False, "reason": "doctrine_not_found", }) return { "ok": True, "id": doc.get("id"), "name_he": doc.get("name_he"), "name_en": doc.get("name_en"), "domain": doc.get("domain"), "elements": doc.get("elements") or [], "exceptions": doc.get("exceptions") or [], "statute_refs": doc.get("statute_refs") or [], "leading_cases": doc.get("leading_cases") or [], "pending_review": bool(doc.get("_pending_lawyer_review")), } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) # ────────────────────────────────────────────────────────────────────── # Day 51 — Judgment Reader: structured render of a single court ruling. # # Takes the raw doc text (from pipeline.get_text), cleans the corpus # prefixes, and parses out: # - title, court, parties, judges, decision_date # - body paragraphs (numbered where possible) # - inline citations (extracted, dedupe) # - source links (Nevo, courts.gov.il where the citation kind permits) # # Returned JSON powers the frontend reader modal (Hebrew serif # typography, linkified citations, sticky paragraph numbers). # ────────────────────────────────────────────────────────────────────── # Strip the same corpus prefix the hierarchical_graph uses. import re as _re # module-level alias for the _JR_* regex bundle below _JR_BRACKET_HEADER_RE = _re.compile(r"^(?:\s*\[[^\]]*\]\s*)+", flags=_re.UNICODE) _JR_PROSE_MARKER_RE = _re.compile( r"^\s*פסיק[הת]\s*[—–\-:]\s*[^:]+\)\s*:\s*", flags=_re.UNICODE) # Citation regex — matches ע"א 1234/97, בג"ץ 1234/95, etc. _JR_CASE_RX = _re.compile( r'((?:בג"?ץ|ע"?א|ע"?פ|רע"?א|רע"?פ|דנ"?א|דנ"?פ|בש"?א|בש"?פ|דב"?ע)\s*\d+\s*[/\-]\s*\d{2,4})', flags=_re.UNICODE, ) # Paragraph splitter: blank line OR Hebrew letter+dot+space at line start _JR_PARA_RX = _re.compile(r"\n\s*\n|\n(?=\d+\.\s)|\n(?=[א-י][.)]\s)") def _fetch_from_wikisource(cite: str) -> Optional[dict]: """Day 52 — pull a judgment from Hebrew Wikisource. Wikisource is community-transcribed primary sources. ~500-1000 famous Israeli rulings are available there in clean Hebrew text with structure (header / parties / judges / body). Open access, no anti-bot, no copyright concerns (court rulings are public domain; the transcription is CC-BY-SA). Returns dict in same shape as judgment_reader_render OR None if the citation isn't on Wikisource. """ import urllib.request, urllib.parse, json import re as _r headers = {"User-Agent": "Mozilla/5.0 (compatible; LegalEyeBot/1.0; +https://legal-eye.1bigfam.com)"} # Step 1: opensearch for the citation q = urllib.parse.quote(cite) try: url = (f"https://he.wikisource.org/w/api.php?action=opensearch" f"&search={q}&limit=3&format=json") req = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(req, timeout=15) as r: data = json.loads(r.read()) titles = data[1] if len(data) > 1 else [] if not titles: return None except Exception: return None # Step 2: parse the FIRST matching page title = titles[0] try: ptitle = urllib.parse.quote(title) purl = (f"https://he.wikisource.org/w/api.php?action=parse" f"&page={ptitle}&prop=text|wikitext&format=json") req = urllib.request.Request(purl, headers=headers) with urllib.request.urlopen(req, timeout=15) as r: pdata = json.loads(r.read()) except Exception: return None html = (pdata.get("parse", {}).get("text", {}) or {}).get("*", "") if not html: return None # Strip MediaWiki UI cruft + nested HTML html = _r.sub(r"]*>.*?", "", html, flags=_r.DOTALL) html = _r.sub(r"]*>.*?", "", html, flags=_r.DOTALL) html = _r.sub(r'', "", html, flags=_r.DOTALL) html = _r.sub(r'", "\n", html, flags=_r.IGNORECASE) html = _r.sub(r"]*>", "\n\n", html, flags=_r.IGNORECASE) # Decode common HTML entities cleaned = _r.sub(r"<[^>]+>", "", html) cleaned = (cleaned .replace(" ", " ") .replace("&", "&") .replace("<", "<") .replace(">", ">") .replace(""", '"') .replace("'", "'") .replace(""", '"')) # Collapse whitespace cleaned = _r.sub(r"[ \t]+", " ", cleaned) cleaned = _r.sub(r"\n{3,}", "\n\n", cleaned).strip() if len(cleaned) < 400: return JSONResponse(status_code=400, content={ "ok": False, "reason": "no_content_after_strip", "n_chars_after_strip": len(cleaned), }) # v2.99.66 — detect anti-bot block pages BEFORE parsing them as # judgment text. court.gov.il + several aggregators serve a # Hebrew block page that contains words like "פעילות בלתי # מורשת", "Case Number:", "Client IP:". We must NOT render that # as the judgment — return a structured error so the frontend # can tell the user clearly + try a different source. BLOCK_PATTERNS = [ r"פעילות\s+בלתי[\s־]+מורשת", r"Case\s+Number\s*:\s*\d{10,}", r"גישתך\s+לאתר\s+נמנעה", r"מערכת\s+ההגנה", r"Access\s+Denied", r"You\s+have\s+been\s+blocked", r"Cloudflare\s+Ray\s+ID", r"Just\s+a\s+moment\s*\.{3}", r"Please\s+complete\s+the\s+security\s+check", ] head_for_check = cleaned[:2000] for pat in BLOCK_PATTERNS: if _r.search(pat, head_for_check): return JSONResponse(status_code=502, content={ "ok": False, "reason": "upstream_anti_bot", "source": source, "source_url": url, "block_pattern": pat, "human_message": f"האתר {source} חסם את הבקשה. ייתכן שיש" f" יותר מדי בקשות מה-IP שלך — נסה שוב מאוחר" f" יותר, או חפש את הפסק במנוע חיפוש.", }) # Heuristic: the actual judgment usually starts at a Hebrew # paragraph mentioning "בית המשפט" / "בפני" / "פסק דין" — # trim everything before that to remove nav/header noise. markers = [r"בית[\s־]+המשפט", r"\bבפני\b", r"\bפסק[\s־]+דין\b", r"\bהחלטה\b", r"\bבעניין\b"] for mk in markers: m = _r.search(mk, cleaned) if m and m.start() > 200: cleaned = cleaned[m.start():] break # Paragraphs paragraphs = [] for chunk in _JR_PARA_RX.split(cleaned): t = chunk.strip() if not t or len(t) < 20: continue num_match = _r.match(r"^((?:\d+\.|[א-י][.)])\s)(.+)", t, _r.DOTALL) if num_match: paragraphs.append({"num": num_match.group(1).strip(), "text": num_match.group(2).strip()}) else: paragraphs.append({"num": None, "text": t}) # Inline citations citations = [] seen = set() for m in _JR_CASE_RX.finditer(cleaned): c = m.group(1).strip() if c not in seen: seen.add(c); citations.append(c) from urllib.parse import quote as _q nevo_q = _q('site:nevo.co.il "' + cite + '"') courts_q = _q('"' + cite + '"') return { "ok": True, "doc_id": f"{source}:{cite}", "citation": cite or None, "court": None, "source": source, "source_url": url, "source_title": None, "n_paragraphs": len(paragraphs), "n_citations": len(citations), "n_chars": len(cleaned), "paragraphs": paragraphs[:200], "citations": citations[:60], "links": { "nevo": f"https://www.google.com/search?q={nevo_q}", "courts": f"https://www.google.com/search?q={courts_q}", }, } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) def _fetch_from_wikipedia(cite: str) -> Optional[dict]: """Day 52b — pull a judgment summary from Hebrew Wikipedia. Wikipedia articles ABOUT famous Israeli rulings (e.g. "הלכת אפרופים", "בג\"ץ קול העם") usually have: - the citation in the title or first paragraph - quoted passages from the judgment - background, summary, and influence Not as good as the full text (Wikisource is preferred), but covers cases that aren't transcribed on Wikisource. Useful as a secondary source. The reader footer surfaces the attribution. """ import urllib.request, urllib.parse, json import re as _r headers = {"User-Agent": "Mozilla/5.0 (compatible; LegalEyeBot/1.0; +https://legal-eye.1bigfam.com)"} # Step 1: opensearch on the bare citation q = urllib.parse.quote(cite) try: url = (f"https://he.wikipedia.org/w/api.php?action=opensearch" f"&search={q}&limit=5&format=json") req = urllib.request.Request(url, headers=headers) with urllib.request.urlopen(req, timeout=15) as r: data = json.loads(r.read()) titles = data[1] if len(data) > 1 else [] if not titles: return None title = titles[0] except Exception: return None # Step 2: parse the page try: ptitle = urllib.parse.quote(title) purl = (f"https://he.wikipedia.org/w/api.php?action=parse" f"&page={ptitle}&prop=text&format=json") req = urllib.request.Request(purl, headers=headers) with urllib.request.urlopen(req, timeout=15) as r: pdata = json.loads(r.read()) except Exception: return None html = (pdata.get("parse", {}).get("text", {}) or {}).get("*", "") if not html: return None # Wikipedia articles have a LOT of UI cruft — strip aggressively. html = _r.sub(r"]*>.*?", "", html, flags=_r.DOTALL) html = _r.sub(r"]*>.*?", "", html, flags=_r.DOTALL) html = _r.sub(r'', "", html, flags=_r.DOTALL) html = _r.sub(r'
]*class="[^"]*(?:infobox|navbox|wikitable)[^"]*".*?', "", html, flags=_r.DOTALL) html = _r.sub(r']*class="reference"[^>]*>.*?', "", html, flags=_r.DOTALL) html = _r.sub(r"", "\n", html) html = _r.sub(r"

", "\n\n", html) cleaned = _r.sub(r"<[^>]+>", "", html) cleaned = _r.sub(r"\[\d+\]", "", cleaned) # footnote refs cleaned = _r.sub(r"\n{3,}", "\n\n", cleaned).strip() if len(cleaned) < 400: return None paragraphs = [] for chunk in _JR_PARA_RX.split(cleaned): t = chunk.strip() if not t: continue num_match = _r.match(r"^((?:\d+\.|[א-י][.)])\s)(.+)", t, _r.DOTALL) if num_match: paragraphs.append({"num": num_match.group(1).strip(), "text": num_match.group(2).strip()}) else: paragraphs.append({"num": None, "text": t}) citations = [] seen = set() for m in _JR_CASE_RX.finditer(cleaned): c = m.group(1).strip() if c not in seen: seen.add(c); citations.append(c) from urllib.parse import quote as _q return { "ok": True, "doc_id": f"wikipedia:{title}", "citation": cite, "court": None, "source": "wikipedia", "source_url": f"https://he.wikipedia.org/wiki/{urllib.parse.quote(title)}", "source_title": title, "n_paragraphs": len(paragraphs), "n_citations": len(citations), "n_chars": len(cleaned), "paragraphs": paragraphs[:200], "citations": citations[:60], "links": { "nevo": "https://www.google.com/search?q=" + _q('site:nevo.co.il "' + cite + '"'), "courts": "https://www.google.com/search?q=" + _q('"' + cite + '"'), }, } # Tiny LRU cache for external fetches — avoids re-pulling same case # repeatedly. Capped at 100 most-recent. _EXTERNAL_FETCH_CACHE: Dict[str, dict] = {} _EXTERNAL_FETCH_ORDER: deque = deque(maxlen=100) @app.get("/v1/judgment/by-cite") def judgment_reader_by_cite(cite: str): # type: ignore """Day 51 — resolve a citation string to a corpus doc_id and return the same structured render as /v1/judgment/{doc_id}/render. Input may include surrounding text (case title, parties, year). We extract just the canonical citation prefix (e.g. 'ע\"א 207/79') before looking up in cn.doc_for_citation, since the network keys on prefixes only. Returns 404 with rich metadata (the canonical citation we tried + suggested external links) so the frontend can show a graceful fallback to Nevo / courts.gov.il. """ try: from ..pipeline import get_pipeline from ..citation_network import get_or_build pipe = get_pipeline() cn = get_or_build(pipe) raw = (cite or "").strip() if not raw: return JSONResponse(status_code=400, content={ "ok": False, "reason": "empty_citation", }) # Extract just the canonical citation prefix (e.g. "ע\"א 207/79"). # The input often arrives with case title attached: # "ע\"א 207/79 רביב נ' בית יולס" → strip down to "ע\"א 207/79" m = _JR_CASE_RX.search(raw) prefix = m.group(1).strip() if m else raw # Try several normalization variants of the prefix candidates = [ prefix, prefix.replace('"', '"').replace("'", "׳"), prefix.replace('"', '"').replace("'", "'"), # Compact form (no spaces): "ע\"א 207/79" → "ע\"א207/79" _re.sub(r"\s+", "", prefix), ] doc_id = None for c in candidates: doc_id = cn.doc_for_citation.get(c) if doc_id: break if not doc_id: # Day 52 — corpus miss → try Hebrew Wikisource before # giving up. Many famous rulings are community-transcribed # there with full text. Cached in-process to avoid repeat fetches. cached = _EXTERNAL_FETCH_CACHE.get(prefix) if cached: return cached for fetcher in (_fetch_from_wikisource, _fetch_from_wikipedia): result = fetcher(prefix) if result: _EXTERNAL_FETCH_CACHE[prefix] = result _EXTERNAL_FETCH_ORDER.append(prefix) if len(_EXTERNAL_FETCH_CACHE) > _EXTERNAL_FETCH_ORDER.maxlen: oldest = _EXTERNAL_FETCH_ORDER.popleft() _EXTERNAL_FETCH_CACHE.pop(oldest, None) return result # Last-resort: 404 with Google escape-hatches. from urllib.parse import quote nevo_q = quote(f'site:nevo.co.il "{prefix}"') courts_q = quote(f'"{prefix}"') return JSONResponse(status_code=404, content={ "ok": False, "reason": "citation_not_in_corpus", "cite": prefix, "raw": raw, "links": { "nevo": f"https://www.google.com/search?q={nevo_q}", "courts": f"https://www.google.com/search?q={courts_q}", }, }) return judgment_reader_render(doc_id) except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) # ────────────────────────────────────────────────────────────────────── # WhatsApp Phase 2 — anonymized observation aggregation. # Storage: append-only JSONL at runtime/whatsapp_observations.jsonl. # Retention: 30 days (older entries pruned on read). # What we accept: {group_hash, ts, kind, payload}. # What we DON'T accept: sender names, raw messages, plaintext IDs. # ────────────────────────────────────────────────────────────────────── import pathlib as _wa_pl import threading as _wa_th import time as _wa_time import hashlib as _wa_hl _WA_OBS_PATH = _wa_pl.Path("tau_rag/runtime/whatsapp_observations.jsonl") _WA_OBS_LOCK = _wa_th.Lock() _WA_TTL_DAYS = 30 def _wa_append_obs(observations): _WA_OBS_PATH.parent.mkdir(parents=True, exist_ok=True) with _WA_OBS_LOCK: with open(_WA_OBS_PATH, "a", encoding="utf-8") as f: for o in observations: f.write(json.dumps(o, ensure_ascii=False) + "\n") def _wa_read_obs(): """Read all observations within TTL, ignore malformed lines.""" if not _WA_OBS_PATH.exists(): return [] cutoff_ts = _wa_time.time() - (_WA_TTL_DAYS * 86400) out = [] with _WA_OBS_LOCK: with open(_WA_OBS_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: o = json.loads(line) except Exception: continue if (o.get("ts") or 0) < cutoff_ts: continue out.append(o) return out class _WaObservation(BaseModel): # type: ignore group_hash: str ts: float kind: str # 'cite' or 'question' payload: Dict[str, Any] class _WaObserveRequest(BaseModel): # type: ignore observations: List[_WaObservation] @app.post("/v1/whatsapp/observe") def whatsapp_observe(body: _WaObserveRequest): # type: ignore """Day 55 — accept anonymized WhatsApp observations from the Chrome helper extension. Stored in JSONL, used by the dashboard to surface trends (top cites, top questions). Strict validation: - group_hash must be a hex SHA256 (64 chars, not the original ID) - kind ∈ {'cite', 'question'} - payload restricted shape: cite → {cite: str} question → {q_hash: str, q_text_clean: str, q_len: int} """ accepted = [] rejected = 0 for obs in (body.observations or [])[:200]: # cap batch try: gh = (obs.group_hash or "").strip().lower() if len(gh) != 64 or not all(c in "0123456789abcdef" for c in gh): rejected += 1 continue if obs.kind not in ("cite", "question", "event"): rejected += 1 continue pl = obs.payload or {} if obs.kind == "cite": if not isinstance(pl.get("cite"), str) or len(pl.get("cite", "")) > 100: rejected += 1 continue clean_pl = {"cite": pl["cite"][:100]} elif obs.kind == "question": qh = (pl.get("q_hash") or "").strip().lower() if len(qh) != 64 or not all(c in "0123456789abcdef" for c in qh): rejected += 1 continue qt = (pl.get("q_text_clean") or "").strip()[:500] clean_pl = {"q_hash": qh, "q_text_clean": qt, "q_len": int(pl.get("q_len") or 0)} # v1.0.0 — opt-in sender + message timestamp enrichment sender = (pl.get("sender") or "").strip()[:80] if sender: clean_pl["sender"] = sender msg_iso = (pl.get("msg_ts_iso") or "").strip() if _re.match(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}(:\d{2})?$", msg_iso): clean_pl["msg_ts_iso"] = msg_iso else: # event # date_iso must be YYYY-MM-DD; title 5-300 chars di = (pl.get("date_iso") or "").strip() if not _re.match(r"^\d{4}-\d{2}-\d{2}$", di): rejected += 1 continue title = (pl.get("title") or "").strip()[:300] if len(title) < 5: rejected += 1 continue ev_kind = (pl.get("ev_kind") or "").strip()[:20] location = (pl.get("location") or "").strip()[:100] clean_pl = {"date_iso": di, "title": title, "ev_kind": ev_kind, "location": location} accepted.append({ "group_hash": gh, "ts": float(obs.ts), "kind": obs.kind, "payload": clean_pl, }) except Exception: rejected += 1 continue if accepted: _wa_append_obs(accepted) return {"ok": True, "accepted": len(accepted), "rejected": rejected} @app.get("/v1/whatsapp/group/{group_hash}") def whatsapp_group_detail(group_hash: str, days: int = 30, limit: int = 200): # type: ignore """Day 56 — return all observations for a single group, sorted by most recent first. Used by the dashboard's "click group → see messages" view.""" gh = (group_hash or "").strip().lower() if len(gh) != 64 or not all(c in "0123456789abcdef" for c in gh): return JSONResponse(status_code=400, content={ "ok": False, "reason": "invalid_group_hash", }) days = max(1, min(days, _WA_TTL_DAYS)) limit = max(10, min(limit, 1000)) cutoff = _wa_time.time() - (days * 86400) rows = [o for o in _wa_read_obs() if o.get("group_hash") == gh and o.get("ts", 0) >= cutoff] rows.sort(key=lambda o: -(o.get("ts") or 0)) rows = rows[:limit] # Bucket per kind for easier UI rendering cites = [o for o in rows if o.get("kind") == "cite"] questions= [o for o in rows if o.get("kind") == "question"] events = [o for o in rows if o.get("kind") == "event"] return { "ok": True, "group_hash": gh, "days": days, "n_observations": len(rows), "n_cites": len(cites), "n_questions": len(questions), "n_events": len(events), "observations": rows, } @app.get("/v1/whatsapp/dashboard") def whatsapp_dashboard(days: int = 7, top_k: int = 20): # type: ignore """Day 55 — aggregated view of WhatsApp observations. v2.99.84 — 5-sec in-memory cache (per days/top_k key). The dashboard is opened many times in a single session; without cache each open re-reads the entire JSONL + re-runs the retriever for every top question. With cache: only the first open per 5-sec window does real work. Returns: top_cites: most-mentioned citations across all observed groups top_questions: most-asked questions (by q_hash) For each, runs our retriever to check if we have a plausible answer (top-1 hit confidence). stats: observation/group counts """ days = max(1, min(days, _WA_TTL_DAYS)) top_k = max(5, min(top_k, 100)) # Cache check cache_key = (days, top_k) cached = _WA_DASH_CACHE.get(cache_key) if cached and cached[1] > _wa_time.time(): return cached[0] cutoff = _wa_time.time() - (days * 86400) rows = [o for o in _wa_read_obs() if o.get("ts", 0) >= cutoff] # Aggregate cites cite_counts: Dict[str, int] = {} cite_groups: Dict[str, set] = {} for o in rows: if o.get("kind") != "cite": continue c = (o.get("payload") or {}).get("cite") or "" if not c: continue cite_counts[c] = cite_counts.get(c, 0) + 1 cite_groups.setdefault(c, set()).add(o.get("group_hash")) top_cites = sorted( [{"cite": c, "n_mentions": n, "n_groups": len(cite_groups.get(c) or set())} for c, n in cite_counts.items()], key=lambda r: (-r["n_mentions"], r["cite"]), )[:top_k] # Aggregate questions q_counts: Dict[str, int] = {} q_groups: Dict[str, set] = {} q_repr: Dict[str, str] = {} # q_hash → representative text q_meta: Dict[str, list] = {} # q_hash → list of {sender, msg_ts_iso, ts} for o in rows: if o.get("kind") != "question": continue pl = o.get("payload") or {} qh = pl.get("q_hash") or "" if not qh: continue q_counts[qh] = q_counts.get(qh, 0) + 1 q_groups.setdefault(qh, set()).add(o.get("group_hash")) if qh not in q_repr: q_repr[qh] = pl.get("q_text_clean") or "" # v1.0.0 — collect sender + message-time metadata if available meta_row = { "ts": o.get("ts"), "sender": pl.get("sender") or None, "msg_ts_iso": pl.get("msg_ts_iso") or None, } if meta_row["sender"] or meta_row["msg_ts_iso"]: q_meta.setdefault(qh, []).append(meta_row) # For top-N questions, check if our retriever has an answer q_ranked = sorted(q_counts.items(), key=lambda kv: -kv[1])[:top_k] top_questions = [] try: from ..pipeline import get_pipeline pipe = get_pipeline() except Exception: pipe = None for qh, n in q_ranked: text = q_repr.get(qh) or "" we_answer = None confidence = None try: if pipe and text: # Use search_documents (the real pipeline API; the # `retrieve` name from earlier draft didn't exist). hits = pipe.search_documents(text, top_k=3) if hits: confidence = float(getattr(hits[0], "score", 0.0)) # BM25 raw scores run ~0.01-0.15 for good matches — # 0.02 is a safer "we found something relevant" gate. we_answer = confidence >= 0.02 else: we_answer = False except Exception as e: # Surface the exception class in confidence field so we can # see it in the dashboard rather than silently dropping it. we_answer = False confidence = -1.0 # Up to 5 most recent {sender, msg_ts_iso} for this question recent_meta = sorted( q_meta.get(qh) or [], key=lambda r: r.get("ts") or 0, reverse=True, )[:5] top_questions.append({ "q_hash": qh, "q_text": text, "n_mentions": n, "n_groups": len(q_groups.get(qh) or set()), "we_answer": we_answer, "confidence": confidence, "recent_meta": recent_meta, }) # Aggregate upcoming events (de-dup by date+title-prefix, future only) from datetime import date as _date, datetime as _dt today_iso = _date.today().isoformat() ev_counts: Dict[str, int] = {} ev_groups: Dict[str, set] = {} ev_meta: Dict[str, dict] = {} for o in rows: if o.get("kind") != "event": continue pl = o.get("payload") or {} di = pl.get("date_iso") or "" title = pl.get("title") or "" if not di or not title: continue if di < today_iso: continue # past # Dedup key: date + 40-char title prefix (loose match) key = di + "|" + title[:40].strip().lower() ev_counts[key] = ev_counts.get(key, 0) + 1 ev_groups.setdefault(key, set()).add(o.get("group_hash")) if key not in ev_meta: ev_meta[key] = { "date_iso": di, "title": title, "ev_kind": pl.get("ev_kind") or "", "location": pl.get("location") or "", } top_events = [] for key, n in ev_counts.items(): m = ev_meta[key] top_events.append({ **m, "n_mentions": n, "n_groups": len(ev_groups.get(key) or set()), }) # Sort by date ascending (soonest first), then by mentions desc top_events.sort(key=lambda e: (e["date_iso"], -e["n_mentions"])) top_events = top_events[:top_k] response = { "ok": True, "days": days, "n_observations": len(rows), "n_groups": len({o.get("group_hash") for o in rows if o.get("group_hash")}), "top_cites": top_cites, "top_questions": top_questions, "top_events": top_events, } # Cache write (v2.99.84) _WA_DASH_CACHE[cache_key] = (response, _wa_time.time() + _WA_DASH_TTL) return response def _strip_remaining_templates(s: str) -> str: """Final pass: strip any leftover `{{...}}` wikitext templates we don't explicitly handle, leaving plain Hebrew text. Handles nested braces (one level) so `{{ח:פנימי|x|y}}` already-substituted leftovers don't leak through.""" import re as _r # Strip simple templates (no nested braces) prev = None cur = s while prev != cur: prev = cur cur = _r.sub(r"\{\{[^{}]*\}\}", "", cur) # Drop wikitext list bullets cur = _r.sub(r"^[\*#:]+\s*", "", cur) # Collapse whitespace cur = _r.sub(r"\s+", " ", cur).strip() return cur # ────────────────────────────────────────────────────────────────────── # Law cache — every successful Wikisource fetch is saved to disk so the # same law never needs to be re-fetched. File: tau_rag/runtime/law_cache/ # Each law: one JSON file named `{sha256(name)}.json`. # Also tracks aliases so "חוק החוזים" and "חוק החוזים (חלק כללי)" both # resolve to the same cached entry. # ────────────────────────────────────────────────────────────────────── _LAW_CACHE_DIR = _wa_pl.Path("tau_rag/runtime/law_cache") _LAW_ALIAS_FILE = _LAW_CACHE_DIR / "aliases.json" _LAW_CACHE_LOCK = _wa_th.Lock() def _law_cache_key(name: str) -> str: return _wa_hl.sha256(name.strip().encode("utf-8")).hexdigest()[:16] def _law_cache_get(name: str) -> Optional[dict]: if not name: return None # Try direct hit p = _LAW_CACHE_DIR / (_law_cache_key(name) + ".json") if p.exists(): try: with open(p, "r", encoding="utf-8") as f: return json.load(f) except Exception: return None # Try alias lookup try: with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f: aliases = json.load(f) canonical = aliases.get(name.strip()) if canonical: return _law_cache_get(canonical) except Exception: pass return None def _law_cache_set(name: str, canonical_title: str, data: dict) -> None: _LAW_CACHE_DIR.mkdir(parents=True, exist_ok=True) with _LAW_CACHE_LOCK: # Write data under canonical title's key canon_key = _law_cache_key(canonical_title) p = _LAW_CACHE_DIR / (canon_key + ".json") try: with open(p, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False) except Exception as e: print(f"[law-cache] write fail: {e}") return # Record alias from requested name → canonical title if name.strip() != canonical_title: aliases = {} try: with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f: aliases = json.load(f) except Exception: pass aliases[name.strip()] = canonical_title try: with open(_LAW_ALIAS_FILE, "w", encoding="utf-8") as f: json.dump(aliases, f, ensure_ascii=False) except Exception: pass def _law_cache_list() -> List[dict]: """Return summary of all cached laws (for the popular-list UI).""" if not _LAW_CACHE_DIR.exists(): return [] out = [] for p in sorted(_LAW_CACHE_DIR.glob("*.json")): if p.name == "aliases.json": continue try: with open(p, "r", encoding="utf-8") as f: d = json.load(f) out.append({ "name": d.get("name") or "?", "n_sections": d.get("n_sections") or 0, "n_paragraphs": d.get("n_paragraphs") or 0, "n_chars": d.get("n_chars") or 0, "source_url": d.get("source_url"), }) except Exception: continue return out @app.post("/v1/laws/prefetch") def laws_prefetch(top: int = 20): # type: ignore """v2.99.101 — Background-friendly bulk prefetch. For the `top` most-cited laws in the corpus that are NOT yet cached, fetch them from Wikisource and store to disk. Cap at 20 per request to be polite to Wikisource (which is community- operated). Run multiple times for larger batches. """ import time as _t try: pop = laws_popular(top * 2) # over-fetch since many may already be cached if isinstance(pop, JSONResponse): return pop items = pop.get("items", []) target = [it for it in items if not it.get("cached")][:max(1, min(top, 20))] if not target: return {"ok": True, "n_fetched": 0, "reason": "all_already_cached"} fetched = [] failed = [] for it in target: res = law_by_name(it["name"]) if isinstance(res, dict) and res.get("ok"): fetched.append({ "name": it["name"], "n_sections": res.get("n_sections", 0), "n_paragraphs": res.get("n_paragraphs", 0), }) else: failed.append({ "name": it["name"], "reason": (res.get("reason") if isinstance(res, dict) else None) or "unknown", }) _t.sleep(0.5) # polite delay (~2 req/sec max) return { "ok": True, "n_fetched": len(fetched), "n_failed": len(failed), "fetched": fetched, "failed": failed, } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.get("/v1/laws/cached") def laws_cached(): # type: ignore """List all laws that have been fetched and cached on disk.""" items = _law_cache_list() items.sort(key=lambda r: r.get("n_chars", 0), reverse=True) return { "ok": True, "n": len(items), "items": items, } # ────────────────────────────────────────────────────────────────────── # v2.99.104 — Auto-seed law cache on cold start. HF Space's ephemeral # storage wipes the cache on every rebuild, so we kick off a background # prefetch of the top-20 most-cited laws right after import. Subsequent # user requests for popular laws then hit cache, not Wikisource. # ────────────────────────────────────────────────────────────────────── def _autoseed_law_cache() -> None: import threading, time def _worker(): try: time.sleep(30) # let pipeline finish loading first if len(_law_cache_list()) >= 15: print(f"[law-cache] already seeded ({len(_law_cache_list())} laws); skip") return print("[law-cache] cold start — seeding top-20 popular laws") res = laws_prefetch(top=20) if isinstance(res, dict) and res.get("ok"): print(f"[law-cache] seed complete: {res.get('n_fetched')} fetched, " f"{res.get('n_failed')} failed") else: print(f"[law-cache] seed failed: {res}") except Exception as e: print(f"[law-cache] seed exception: {type(e).__name__}: {e}") threading.Thread(target=_worker, daemon=True).start() _autoseed_law_cache() @app.get("/v1/laws/popular") def laws_popular(top: int = 40): # type: ignore """Return the laws most-mentioned in our judgment corpus. Uses citation_network.popular_citations filtered to statute kind.""" try: from ..pipeline import get_pipeline from ..citation_network import get_or_build, popular_citations pipe = get_pipeline() cn = get_or_build(pipe) rows = popular_citations(cn, top_k=max(top, 80), kind="statute") # Filter to laws (statute_refs include both laws & regulations) # Also dedupe by canonical-looking name (drop year suffixes for matching) seen = set() items = [] for r in rows: name = r.get("citation") or "" if not name: continue # Strip year-suffix patterns: ", התש..." short = _re.sub(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־]\d{4}\s*$", "", name).strip() if short in seen: continue seen.add(short) items.append({ "name": short, "full": name, "n_citers": r.get("n_citers") or 0, }) if len(items) >= top: break # v2.99.103 — Mark cached. The cache stores canonical titles like # "חוק החוזים (חלק כללי), תשל״ג–1973" but popular returns short names # with year stripped. Match against multiple forms (short, long, alias) # AND fall back to a prefix-match (cached title starts with item name). cached_long = set(c["name"] for c in _law_cache_list()) # Year-strip — note en-dash (–) and em-dash (—) in addition to hyphen/maqaf _year_re = _re.compile(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־–—]\d{4}\s*$") cached_short = set(_year_re.sub("", n).strip() for n in cached_long) alias_keys = set() try: with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f: alias_keys = set(json.load(f).keys()) except Exception: pass def _is_cached(short_name: str, full_name: str) -> bool: if not short_name: return False if short_name in cached_long or full_name in cached_long: return True if short_name in cached_short or full_name in cached_short: return True if short_name in alias_keys or full_name in alias_keys: return True # Prefix fallback — canonical "X, תש..." starts with short "X" prefix = short_name + "," for c in cached_long: if c.startswith(prefix): return True return False for it in items: it["cached"] = _is_cached(it["name"], it["full"]) return {"ok": True, "n": len(items), "items": items} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) # ────────────────────────────────────────────────────────────────────── # v2.99.114 — Section-level reverse links. For each law, scan all # judgments that cite it and extract "סעיף N לחוק X" patterns to build # a per-section citer count. Cached to disk under law_cache_meta/. # ────────────────────────────────────────────────────────────────────── _LAW_META_DIR = _wa_pl.Path("tau_rag/runtime/law_cache_meta") def _section_citer_key(canonical: str) -> str: return _wa_hl.sha256(canonical.strip().encode("utf-8")).hexdigest()[:16] def _build_section_citer_index(pipe, cn, canonical: str, lookup_strs: set) -> dict: """Scan all docs that cite this law, extract per-section citers. Returns: {section_number: {"n": int, "doc_ids": [str, ...]}}""" # Build regex: catches "סעיף N לX" / "ס' N לX" / "תקנה N לX" def esc_rx(s): return _re.sub(r'([.*+?^${}()|\[\]\\])', r'\\\1', s) sorted_strs = sorted(lookup_strs, key=len, reverse=True) law_alt = "|".join(esc_rx(s) for s in sorted_strs) rx = _re.compile( r"(?:סעיף|ס\'|תקנה|תקנות)\s+([0-9]+[א-ת]?(?:\([א-ת0-9]{1,3}\))?)\s+ל(?:" + law_alt + r")" ) # Get all docs that cite this law (from any lookup string) candidate_ids = set() for s in lookup_strs: for did in (cn.cited_by.get(s, []) or []): candidate_ids.add(did) # Filter out non-judgment docs NON_JUD = ("heb_law/", "heb_statute/", "heb_wikilaw/", "heb_kolzchut/") candidate_ids = {d for d in candidate_ids if not any(d.startswith(p) for p in NON_JUD)} by_section: Dict[str, dict] = {} docs_meta: Dict[str, dict] = {} docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) doc_by_id = {} for d in docs: did = getattr(d, "doc_id", None) or getattr(d, "id", None) if did and did in candidate_ids: doc_by_id[did] = d for did, d in doc_by_id.items(): md = getattr(d, "metadata", None) or {} if md.get("kind") in ("statute", "law", "wikipedia", "kolzchut"): continue text = getattr(d, "text", "") or "" if not text and hasattr(pipe, "get_text"): try: text = pipe.get_text(did) or "" except Exception: text = "" if not text: continue # Track meta for later expansion docs_meta[did] = { "citation": md.get("citation") or did, "court": md.get("court", ""), "verdict_dt": (md.get("verdict_dt") or "")[:10], } seen_sections_in_doc = set() for m in rx.finditer(text): sec = m.group(1).strip().rstrip(".") if sec in seen_sections_in_doc: continue seen_sections_in_doc.add(sec) # v2.99.116 — law-specific classifier match_text = m.group(0) rel = _classify_law_citation_context(text, match_text) slot = by_section.setdefault(sec, {"n": 0, "doc_ids": [], "relations": []}) slot["n"] += 1 slot["doc_ids"].append(did) slot["relations"].append(rel) return { "schema": "v3-law-rel", # v2.99.116 — law-specific relations "canonical": canonical, "n_sections_cited": len(by_section), "n_docs": len(docs_meta), "sections": by_section, "docs_meta": docs_meta, } @app.get("/v1/law/section-summary") def law_section_summary(name: str): # type: ignore """v2.99.114 — Per-section citer counts for a law. Returns sections={section_num: n_citers} for badge display in UI. Cached to disk after first build (slow scan).""" try: from ..pipeline import get_pipeline from ..citation_network import get_or_build pipe = get_pipeline() cn = get_or_build(pipe) nm = (name or "").strip() if not nm: return JSONResponse(status_code=400, content={"ok": False, "reason": "empty_name"}) # Resolve canonical alias_map = {} try: with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f: alias_map = json.load(f) except Exception: pass canonical = alias_map.get(nm, nm) # Try cache (only if it matches current schema) _LAW_META_DIR.mkdir(parents=True, exist_ok=True) cache_p = _LAW_META_DIR / (_section_citer_key(canonical) + ".json") if cache_p.exists(): try: with open(cache_p, "r", encoding="utf-8") as f: cached = json.load(f) if cached.get("schema") == "v3-law-rel": return { "ok": True, "name": nm, "canonical": canonical, "from_cache": True, "n_sections": cached.get("n_sections_cited", 0), "n_docs": cached.get("n_docs", 0), "sections": {s: v["n"] for s, v in cached.get("sections", {}).items()}, } # else: stale schema, fall through and rebuild except Exception: pass # Build lookup_strs same way as law_citers lookup_strs = {nm, canonical} for short, canon in alias_map.items(): if canon == canonical: lookup_strs.add(short) _year_re = _re.compile(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־–—]\d{4}\s*$") short_canon = _year_re.sub("", canonical).strip() if short_canon != canonical: lookup_strs.add(short_canon) # Build the index (slow) index = _build_section_citer_index(pipe, cn, canonical, lookup_strs) # Cache try: with open(cache_p, "w", encoding="utf-8") as f: json.dump(index, f, ensure_ascii=False) except Exception as e: print(f"[section-citers] cache write fail: {e}") return { "ok": True, "name": nm, "canonical": canonical, "from_cache": False, "n_sections": index["n_sections_cited"], "n_docs": index["n_docs"], "sections": {s: v["n"] for s, v in index["sections"].items()}, } except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"}) @app.get("/v1/law/section-citers") def law_section_citers(name: str, section: str, top: int = 20): # type: ignore """v2.99.114 — Judgments that cite a SPECIFIC section of a law.""" try: from ..pipeline import get_pipeline from ..citation_network import get_or_build pipe = get_pipeline() cn = get_or_build(pipe) nm = (name or "").strip() sec = (section or "").strip().rstrip(".") if not nm or not sec: return JSONResponse(status_code=400, content={"ok": False, "reason": "missing_args"}) # Resolve canonical alias_map = {} try: with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f: alias_map = json.load(f) except Exception: pass canonical = alias_map.get(nm, nm) # Load index from cache (build if missing) _LAW_META_DIR.mkdir(parents=True, exist_ok=True) cache_p = _LAW_META_DIR / (_section_citer_key(canonical) + ".json") if not cache_p.exists(): # Trigger summary build first (which writes the cache) law_section_summary(nm) if not cache_p.exists(): return {"ok": True, "n": 0, "items": []} with open(cache_p, "r", encoding="utf-8") as f: index = json.load(f) sect_data = (index.get("sections") or {}).get(sec) if not sect_data: return {"ok": True, "n": 0, "items": [], "section": sec, "canonical": canonical} docs_meta = index.get("docs_meta", {}) relations = sect_data.get("relations") or ([None] * len(sect_data["doc_ids"])) items = [] for i, did in enumerate(sect_data["doc_ids"][:top]): md = docs_meta.get(did, {}) items.append({ "doc_id": did, "citation": md.get("citation") or did, "court": md.get("court", ""), "verdict_dt": md.get("verdict_dt", ""), "relation": (relations[i] if i < len(relations) else None) or "neutral", }) # Sort newest first def _year(r): s = (r.get("verdict_dt") or "")[:4] return -int(s) if s.isdigit() else 0 items.sort(key=_year) return { "ok": True, "name": nm, "canonical": canonical, "section": sec, "n": len(items), "n_total": sect_data["n"], "items": items, } except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"}) # ────────────────────────────────────────────────────────────────────── # v2.99.151 — Branded email helper. Wraps a plain-text body in a # legal-eye HTML template (Navy + Gold + Frank Ruhl Libre) and sends as # multipart/alternative (plain + HTML). Used by all SMTP-based email # senders (magic link, lawyer assigned, lead notify, status change, # dispute, admin message, digest, lawyer application). # ────────────────────────────────────────────────────────────────────── def _build_branded_email_html(plain_body: str, *, title: str = "", cta_label: str = "", cta_url: str = "") -> str: """Wrap plain text in branded HTML template. Inline CSS only (email-safe).""" import html as _h paras = [] for blk in (plain_body or "").split("\n\n"): blk = blk.strip() if not blk: continue esc = _h.escape(blk).replace("\n", "
") # Auto-link bare URLs (very simple: http(s)://...) import re as _re esc = _re.sub(r'(https?://[^\s<]+)', r'\1', esc) paras.append(f'

{esc}

') body_html = "\n".join(paras) title_html = "" if title: title_html = ( f'

' f'{_h.escape(title)}

' ) cta_html = "" if cta_label and cta_url: cta_html = ( f'' ) return f"""
LEGAL EYE
Legal Intelligence
 
{title_html} {body_html} {cta_html}

רואים משפט אחרת · מקור. הקשר. עורך דין כשצריך.

© legal-eye · legal-eye.1bigfam.com

""" def _send_email_branded(to_addr: str, subject: str, plain_body: str, *, title: str = "", cta_label: str = "", cta_url: str = "", timeout: int = 15) -> bool: """Single SMTP sender. Sends multipart (plain + branded HTML). Returns True on send. Reads SMTP config from env (LE_LEAD_EMAIL_SMTP_*). If unconfigured, returns False.""" import os, smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText try: host = os.environ.get("LE_LEAD_EMAIL_SMTP_HOST", "smtp.gmail.com").strip() port_str = os.environ.get("LE_LEAD_EMAIL_SMTP_PORT", "465").strip() or "465" user = os.environ.get("LE_LEAD_EMAIL_SMTP_USER", "").strip() password = os.environ.get("LE_LEAD_EMAIL_SMTP_PASS", "").strip() from_addr = os.environ.get("LE_LEAD_EMAIL_FROM", user).strip() or user if not (user and password and to_addr): return False msg = MIMEMultipart("alternative") msg["Subject"] = subject msg["From"] = from_addr msg["To"] = to_addr msg.attach(MIMEText(plain_body, "plain", "utf-8")) html_body = _build_branded_email_html(plain_body, title=title, cta_label=cta_label, cta_url=cta_url) msg.attach(MIMEText(html_body, "html", "utf-8")) port = int(port_str) if port == 465: with smtplib.SMTP_SSL(host, port, timeout=timeout) as s: s.login(user, password); s.send_message(msg) else: with smtplib.SMTP(host, port, timeout=timeout) as s: s.starttls(); s.login(user, password); s.send_message(msg) return True except Exception as e: print(f"[email] send FAIL to={to_addr}: {type(e).__name__}: {e}") return False # ────────────────────────────────────────────────────────────────────── # v2.99.120 — Lawyer waitlist. When a user clicks "פנה לעורך דין" from # the triage result, we collect a structured request (no lawyer pool # yet — pre-launch). Each submission saved to JSONL on disk. # ────────────────────────────────────────────────────────────────────── _LAWYER_REQUEST_PATH = _wa_pl.Path("tau_rag/runtime/lawyer_requests.jsonl") _LAWYER_REQUEST_LOCK = _wa_th.Lock() def _notify_lawyer_assigned_async(record: dict, lawyer_email: str, lawyer_name: str) -> None: """v2.99.136 — Email a lawyer when they're assigned a new request. PII gating: question + domain + risk visible, contact details NOT included (lawyer must accept first).""" import threading, os if not lawyer_email: return # v2.99.143 — in-app notification domain_he = { "employment_law": "דיני עבודה", "contracts": "דיני חוזים", "consumer": "דיני צרכנות", "tenancy": "שכירות / מקרקעין", }.get(record.get("domain", ""), record.get("domain", "")) _create_notification( lawyer_email, kind="lawyer_assigned", title=("⚠ פנייה דחופה הוקצתה לך" if record.get("urgent") else "פנייה חדשה הוקצתה לך"), body=f"תחום: {domain_he or '(לא סווג)'}", link="/lawyer-dashboard/", related_request_id=record.get("id", ""), ) def _worker(): q_short = (record.get("question") or "")[:300] urgent = bool(record.get("urgent")) sla = "4 שעות" if urgent else "24 שעות" domain_he_inner = { "employment_law": "דיני עבודה", "contracts": "דיני חוזים", "consumer": "דיני צרכנות", "tenancy": "שכירות / מקרקעין", "commercial": "מסחרי", "tax": "מיסים", "ip": "קניין רוחני", }.get(record.get("domain", ""), record.get("domain", "")) risk_he = { "high": "גבוה", "medium_high": "בינוני-גבוה", "medium": "בינוני", "low": "נמוך", }.get(record.get("risk_level", ""), record.get("risk_level", "?")) body = ( f"שלום עו\"ד {lawyer_name},\n\n" f"פנייה חדשה הוקצתה אליך ב-legal-eye.\n\n" f"תחום: {domain_he_inner or '(לא סווג)'}\n" f"רמת סיכון: {risk_he}\n" f"דחיפות: {'⚠ דחוף · SLA ' + sla if urgent else 'רגיל · SLA ' + sla}\n" f"{'מצורף מסמך: כן' + chr(10) if record.get('has_attachment') else ''}\n" f"השאלה של הלקוח:\n" f"\"{q_short}{'...' if len(record.get('question','')) > 300 else ''}\"\n\n" f"השלב הבא: לחץ \"קבל\" בדשבורד כדי לראות את פרטי הקשר של הלקוח ולהתחיל בעבודה. אם הפנייה לא מתאימה לך — לחץ \"דחה\" ונשייך אותה לעו\"ד אחר.\n\n" f"מזהה: {record.get('id', '?')[:8]}" ) subject = ( f"[legal-eye] {'⚠ פנייה דחופה' if urgent else 'פנייה חדשה'} " f"· {domain_he_inner or '?'} · {record.get('id','?')[:8]}" ) ok = _send_email_branded( lawyer_email, subject, body, title=("⚠ פנייה דחופה הוקצתה לך" if urgent else "פנייה חדשה הוקצתה לך"), cta_label="צפה בדשבורד ←", cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/", ) if ok: print(f"[lawyer-assigned] sent → {lawyer_email}") threading.Thread(target=_worker, daemon=True).start() def _notify_user_status_change_async(record: dict, old_status: str, new_status: str) -> None: """v2.99.133 — Email the user when admin changes their request's status. Fires only if record has user_email + SMTP configured. Silent on failure.""" import threading, os if not record.get("user_email"): return # Only notify meaningful transitions (skip spam, no-op) if new_status not in ("contacted", "done"): return if new_status == old_status: return # v2.99.143 — in-app notification if new_status == "contacted": _create_notification( record["user_email"], kind="status_change", title="הצוות יצר איתך קשר על הפנייה שלך", body=f"עורך דין מאומת בתהליך שיוך. נחזור אליך בקרוב.", link="/my-matters/", related_request_id=record.get("id", ""), ) elif new_status == "done": _create_notification( record["user_email"], kind="status_change", title="הפנייה שלך נסגרה", body="", link="/my-matters/", related_request_id=record.get("id", ""), ) def _worker(): to_addr = record["user_email"] q_short = (record.get("question") or "")[:140] q_ellipsis = "..." if len(record.get("question", "")) > 140 else "" if new_status == "contacted": subject = "legal-eye · הצוות יצר איתך קשר על הפנייה שלך" title = "הפנייה שלך התקדמה" body = ( f"שלום,\n\n" f"הפנייה שלך ל-legal-eye התקדמה — הצוות יצר איתך קשר ועכשיו הוא בתהליך שיוך עורך דין מתאים.\n\n" f"הפנייה: \"{q_short}{q_ellipsis}\"\n" f"מזהה: {record.get('id','?')[:8]}\n\n" f"אפשר לעקוב אחרי הסטטוס באזור האישי." ) else: # done subject = "legal-eye · הפנייה שלך טופלה" title = "הפנייה שלך נסגרה" body = ( f"שלום,\n\n" f"הפנייה שלך ל-legal-eye נסגרה. אם יש לך שאלות המשך — אפשר לפתוח פנייה חדשה דרך הצ'אט הראשי.\n\n" f"הפנייה: \"{q_short}{q_ellipsis}\"\n" f"מזהה: {record.get('id','?')[:8]}" ) ok = _send_email_branded( to_addr, subject, body, title=title, cta_label="לאזור האישי ←", cta_url="https://legal-eye.1bigfam.com/my-matters/", ) if ok: print(f"[user-notify] sent {new_status} email → {to_addr}") threading.Thread(target=_worker, daemon=True).start() def _notify_lead_async(record: dict) -> None: """v2.99.122 — Notify outside-world about a new waitlist lead. Fire-and-forget — never blocks the API response, never raises. Two channels, both optional: - Webhook: env LE_LEAD_WEBHOOK_URL (POST JSON to any URL) - Email: env LE_LEAD_EMAIL_TO + LE_LEAD_EMAIL_SMTP_{HOST,PORT,USER,PASS} """ import threading, os def _worker(): # ── Webhook ── try: url = os.environ.get("LE_LEAD_WEBHOOK_URL", "").strip() if url: import urllib.request, urllib.error # Build a readable summary alongside the raw record summary = ( f"🆕 פנייה חדשה ל-legal-eye\n" f"מזהה: {record.get('id', '?')[:8]}\n" f"שאלה: {record.get('question', '')[:200]}\n" f"שם: {record.get('name', '(לא צוין)')}\n" f"קשר: {record.get('contact', '?')} ({record.get('contact_method', 'either')})\n" f"תחום: {record.get('domain', '?')} · " f"סיכון: {record.get('risk_level', '?')}\n" f"דחוף: {'כן' if record.get('urgent') else 'לא'}\n" f"הערות: {record.get('notes', '(אין)')}\n" f"זמן: {record.get('iso', '?')}" ) payload = {"text": summary, "summary": summary, "record": record} req = urllib.request.Request( url, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) try: with urllib.request.urlopen(req, timeout=10) as r: print(f"[lead-notify] webhook OK ({r.status}) → {url[:60]}") except Exception as e: print(f"[lead-notify] webhook FAIL: {type(e).__name__}: {e}") except Exception as e: print(f"[lead-notify] webhook outer FAIL: {e}") # ── Email via Gmail SMTP ── try: to_addr = os.environ.get("LE_LEAD_EMAIL_TO", "").strip() if to_addr: body = ( f"פנייה חדשה ל-legal-eye התקבלה זה עתה.\n\n" f"מזהה: {record.get('id', '?')}\n" f"זמן: {record.get('iso', '?')}\n\n" f"שאלה:\n{record.get('question', '')}\n\n" f"שם: {record.get('name', '(לא צוין)')}\n" f"קשר: {record.get('contact', '?')}\n" f"דרך מועדפת: {record.get('contact_method', 'either')}\n" f"תחום: {record.get('domain', '?')}\n" f"סיווג: {record.get('triage_category', '?')}\n" f"רמת סיכון: {record.get('risk_level', '?')}\n" f"דחוף: {'כן' if record.get('urgent') else 'לא'}\n\n" f"הערות:\n{record.get('notes', '(אין)')}" ) subject = f"[legal-eye] פנייה חדשה · {record.get('domain', '?')} · {record.get('id', '?')[:8]}" ok = _send_email_branded( to_addr, subject, body, title="🆕 פנייה חדשה ל-legal-eye", cta_label="לדשבורד שיוך עורכי דין ←", cta_url="https://legal-eye.1bigfam.com/admin-leads/lawyer-requests.html", ) if ok: print(f"[lead-notify] email OK → {to_addr}") except Exception as e: print(f"[lead-notify] email outer FAIL: {e}") threading.Thread(target=_worker, daemon=True).start() class _LawyerRequestBody(BaseModel): # type: ignore question: str name: Optional[str] = "" contact: str # phone or email contact_method: Optional[str] = "either" # whatsapp / email / phone / either domain: Optional[str] = "" triage_category: Optional[str] = "" risk_level: Optional[str] = "" urgent: Optional[bool] = False notes: Optional[str] = "" hp: Optional[str] = "" # v2.99.180 — honeypot hp2: Optional[str] = "" # v2.99.180 — honeypot @app.post("/v1/lawyer-request/submit") def lawyer_request_submit(req: _LawyerRequestBody, request: Request): # type: ignore """v2.99.120 — Save a lawyer-contact request to the waitlist. v2.99.130 — Attaches user_id if signed in (Authorization: Bearer ...). v2.99.180 — honeypot + IP rate limit (3/min, burst 5). No lawyer pool exists yet; this is the lead-capture before launch.""" import time, uuid # Honeypot — bots fill, humans don't see. Silently accept then drop. if _honeypot_caught(req.hp, req.hp2): return {"ok": True, "id": "drop", "message": "received"} if not _public_rate_check(request, "lawyer-request", per_min=3, burst=5): return JSONResponse(status_code=429, content={"ok": False, "reason": "rate_limited"}) try: # v2.99.130 — Optional user attribution signed_in_user = None try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if sess: signed_in_user = {"id": sess["user_id"], "email": sess["email"]} except Exception: pass q = (req.question or "").strip() c = (req.contact or "").strip() if len(q) < 4 or len(c) < 4: return JSONResponse(status_code=400, content={ "ok": False, "reason": "missing_required_fields", }) _LAWYER_REQUEST_PATH.parent.mkdir(parents=True, exist_ok=True) record = { "id": str(uuid.uuid4()), "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "question": q, "name": (req.name or "").strip(), "contact": c, "contact_method": req.contact_method or "either", "domain": req.domain or "", "triage_category": req.triage_category or "", "risk_level": req.risk_level or "", "urgent": bool(req.urgent), "notes": (req.notes or "").strip(), "status": "new", "user_id": signed_in_user["id"] if signed_in_user else None, "user_email": signed_in_user["email"] if signed_in_user else None, } with _LAWYER_REQUEST_LOCK: with open(_LAWYER_REQUEST_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") # v2.99.122 — fire async notification (webhook + optional email) _notify_lead_async(record) # v2.99.144 — audit _audit_log( actor=signed_in_user["email"] if signed_in_user else "anonymous", role="user" if signed_in_user else "anonymous", action="request_submitted", target=record["id"], meta={"domain": record["domain"], "urgent": record["urgent"], "has_attachment": False}, ) # Always log to stdout (HF Space logs) print(f"[lawyer-request] new #{record['id'][:8]} domain={record['domain']} " f"urgent={record['urgent']} contact={record['contact_method']}") return { "ok": True, "id": record["id"], "expected_sla": "24 hours" if not record["urgent"] else "4 hours", "message": "בקשתך התקבלה. נחזור אליך תוך 24 שעות (4 שעות לדחוף).", } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.post("/v1/lawyer-request/test-notify") def lawyer_request_test_notify(token: str = ""): # type: ignore """Admin: fire a test notification (webhook + email) without saving anything to the waitlist. Useful to verify env-var config.""" import os, time, uuid admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) record = { "id": "test-" + str(uuid.uuid4())[:8], "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "question": "🧪 בדיקת notification — אין צורך לטפל", "name": "Test", "contact": "test@example.com", "contact_method": "email", "domain": "employment_law", "triage_category": "personal_legal_advice", "risk_level": "medium_high", "urgent": False, "notes": "Sent from /v1/lawyer-request/test-notify", "status": "test", } _notify_lead_async(record) return { "ok": True, "message": "Notification fired in background; check webhook/email destination.", "webhook_url_set": bool(os.environ.get("LE_LEAD_WEBHOOK_URL")), "email_to_set": bool(os.environ.get("LE_LEAD_EMAIL_TO")), "email_user_set": bool(os.environ.get("LE_LEAD_EMAIL_SMTP_USER")), } # ────────────────────────────────────────────────────────────────────── # v2.99.124 — Document upload v1 # A user with a personal case can attach a document to their lawyer # request. V1 does NOT do AI document review — the file goes to the # admin queue alongside the lawyer-request record, and a human (the # operator) reviews + routes it to a verified lawyer. # ────────────────────────────────────────────────────────────────────── _DOC_UPLOAD_DIR = _wa_pl.Path("tau_rag/runtime/document_uploads") _DOC_ALLOWED_EXT = {".pdf", ".docx", ".doc", ".png", ".jpg", ".jpeg", ".txt", ".rtf"} _DOC_MAX_BYTES = 10 * 1024 * 1024 # 10 MB # v2.99.137 — lawyer deliverables (what the lawyer hands back to the client) _DELIVERABLE_DIR = _wa_pl.Path("tau_rag/runtime/deliverables") # v2.99.149 — Weekly admin digest state _DIGEST_STATE_PATH = _wa_pl.Path("tau_rag/runtime/admin_digest_state.json") def _compose_admin_digest() -> str: """Build the weekly Hebrew digest body from current JSONL state. Window: last 7 days.""" import time now = time.time() window_start = now - (7 * 24 * 60 * 60) # Load all relevant files requests = [] if _LAWYER_REQUEST_PATH.exists(): try: with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: requests.append(json.loads(line)) except: continue except Exception: pass apps = [] if _LAWYER_APP_PATH.exists(): try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: apps.append(json.loads(line)) except: continue except Exception: pass # Activity in the window new_reqs = [r for r in requests if (r.get("ts") or 0) >= window_start] new_apps = [a for a in apps if (a.get("ts") or 0) >= window_start] delivered = [r for r in requests if r.get("deliverable_ts") and r["deliverable_ts"] >= window_start] approved = [r for r in requests if r.get("approval_ts") and r["approval_ts"] >= window_start and not r.get("auto_approved")] auto_releas = [r for r in requests if r.get("approval_ts") and r["approval_ts"] >= window_start and r.get("auto_approved")] disputed_w = [r for r in requests if r.get("dispute_ts") and r["dispute_ts"] >= window_start] resolved_w = [r for r in requests if r.get("dispute_resolution_ts") and r["dispute_resolution_ts"] >= window_start] # Attention items (current state, not windowed) open_disputes = [r for r in requests if r.get("status") == "disputed"] pending_apps = [a for a in apps if (a.get("status") or "submitted") in ("submitted", "under_review")] stale_new = [r for r in requests if r.get("status") in ("new", "offered") and (now - (r.get("ts") or now)) > 24 * 3600] contacted_no_deliv = [r for r in requests if r.get("status") == "contacted" and r.get("lawyer_response") == "accepted" and (now - (r.get("assigned_ts") or now)) > 48 * 3600] # Top performers (verified lawyers w/ best acceptance + activity) from ..matching import stats_for_all_lawyers try: all_stats = stats_for_all_lawyers() except: all_stats = {} perf = [] for a in apps: if a.get("status") != "verified": continue s = all_stats.get(a.get("id", ""), {}) if not s.get("received"): continue perf.append({ "name": a.get("full_name", "?"), "received": s["received"], "accepts": s.get("direct_accepted", 0) + s.get("race_accepted", 0), "rate": s.get("acceptance_rate"), "active": s.get("active", 0), }) perf.sort(key=lambda x: (-(x.get("accepts") or 0), -(x.get("rate") or 0))) # Domain distribution this week domain_count: Dict[str, int] = {} for r in new_reqs: d = r.get("domain") or "(לא סווג)" domain_count[d] = domain_count.get(d, 0) + 1 domains_str = " · ".join(f"{d}: {n}" for d, n in sorted(domain_count.items(), key=lambda kv: -kv[1])[:6]) or "(אין)" iso_now = time.strftime("%d/%m/%Y", time.localtime(now)) iso_start = time.strftime("%d/%m/%Y", time.localtime(window_start)) body = ( f"שלום,\n\n" f"סיכום שבועי של legal-eye · {iso_start} → {iso_now}\n\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" f"📊 פעילות (7 ימים)\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" f" {len(new_reqs):>3} פניות חדשות\n" f" {len(new_apps):>3} בקשות הצטרפות עו\"ד\n" f" {len(delivered):>3} תוצרים הוגשו\n" f" {len(approved):>3} אושרו ידנית\n" f" {len(auto_releas):>3} שוחררו אוטומטית (72h)\n" f" {len(disputed_w):>3} מחלוקות נפתחו\n" f" {len(resolved_w):>3} מחלוקות נסגרו\n\n" f"תפוצת תחומים: {domains_str}\n\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" f"⚠ דרוש טיפול (מצב נוכחי)\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" f" {len(open_disputes):>3} מחלוקות פתוחות\n" f" {len(pending_apps):>3} בקשות עו\"ד ממתינות לאימות\n" f" {len(stale_new):>3} פניות 'new'/'offered' מעל 24 שעות\n" f" {len(contacted_no_deliv):>3} פניות שעו\"ד קיבל ולא הגיש תוצר >48 שעות\n\n" ) if perf: body += ( f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n" f"🏆 מובילי הפול (עו\"ד מאומתים)\n" f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" ) for p in perf[:5]: rate_str = f"{p['rate']}%" if p['rate'] is not None else "—" body += f" עו\"ד {p['name']:<25s} | {p['accepts']}/{p['received']} פניות | קבלה: {rate_str}\n" body += "\n" body += ( f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n" f"מסכי ניהול:\n" f" פניות: https://legal-eye.1bigfam.com/admin-leads/lawyer-requests.html\n" f" בקשות עו\"ד: https://legal-eye.1bigfam.com/admin-leads/lawyer-applications.html\n" f" Triage log: https://legal-eye.1bigfam.com/admin-leads/triage-log.html\n" f" Audit log: https://legal-eye.1bigfam.com/admin-leads/audit-log.html\n\n" f"— legal-eye · weekly digest · {iso_now}\n" ) return body def _send_admin_digest(body: str) -> bool: """v2.99.151 — Send the digest via branded helper. Sync.""" import os, time to_addr = os.environ.get("LE_LEAD_EMAIL_TO", "").strip() if not to_addr: print("[digest] missing LE_LEAD_EMAIL_TO") return False ok = _send_email_branded( to_addr, f"[legal-eye] סיכום שבועי · {time.strftime('%d/%m/%Y')}", body, title=f"סיכום שבועי · {time.strftime('%d/%m/%Y')}", cta_label="לדשבורד הניהול ←", cta_url="https://legal-eye.1bigfam.com/admin-leads/lawyer-requests.html", timeout=30, ) if ok: print(f"[digest] sent → {to_addr}") return ok def _load_digest_state() -> dict: if not _DIGEST_STATE_PATH.exists(): return {} try: with open(_DIGEST_STATE_PATH, "r", encoding="utf-8") as f: return json.load(f) except: return {} def _save_digest_state(s: dict) -> None: try: _DIGEST_STATE_PATH.parent.mkdir(parents=True, exist_ok=True) with open(_DIGEST_STATE_PATH, "w", encoding="utf-8") as f: json.dump(s, f, ensure_ascii=False) except Exception as e: print(f"[digest] state save FAIL: {e}") def _start_admin_digest_thread() -> None: """v2.99.149 — Daemon thread. Sends digest once per week on Sunday between 09:00–10:00 Israeli time. Uses a state file to avoid double-fire across restarts.""" import threading, time def _loop(): # Initial warmup delay — let app finish booting time.sleep(90) while True: try: now = time.time() lt = time.localtime(now) # Sunday = weekday 6 in Python (Mon=0...Sun=6) # In Israel, the work week starts Sunday — locale-wise that's # 'weekday' index 6 in the standard tm_wday (Mon=0). # Anchor: send on Sunday 9-10 AM local is_sunday = (lt.tm_wday == 6) is_morning = (9 <= lt.tm_hour < 10) if is_sunday and is_morning: st = _load_digest_state() last_iso = st.get("last_sent_iso", "") today_iso = time.strftime("%Y-%m-%d", lt) if not last_iso.startswith(today_iso): body = _compose_admin_digest() ok = _send_admin_digest(body) if ok: _save_digest_state({ "last_sent_ts": now, "last_sent_iso": time.strftime("%Y-%m-%dT%H:%M:%S", lt), }) _audit_log( actor="system", role="system", action="admin_digest_sent", target="", meta={"sent_at": today_iso}, ) except Exception as e: print(f"[digest-loop] FAIL: {e}") time.sleep(30 * 60) # check every 30 min threading.Thread(target=_loop, daemon=True, name="admin-digest").start() print("[digest] background thread started (Sundays 9-10 AM)") _start_admin_digest_thread() @app.post("/v1/admin/send-digest-now") def admin_send_digest_now(token: str = ""): # type: ignore """Admin: trigger the weekly digest manually (for testing or ad-hoc).""" import os, time admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) body = _compose_admin_digest() ok = _send_admin_digest(body) if ok: _save_digest_state({ "last_sent_ts": time.time(), "last_sent_iso": time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime()), }) _audit_log(actor="admin", role="admin", action="admin_digest_sent_manual", target="", meta={}) return {"ok": True, "sent": ok, "body_preview": body[:1200]} # v2.99.143 — In-app notifications storage _NOTIFICATIONS_PATH = _wa_pl.Path("tau_rag/runtime/notifications.jsonl") _NOTIFICATIONS_LOCK = _wa_th.Lock() # v2.99.144 — Audit log storage _AUDIT_LOG_PATH = _wa_pl.Path("tau_rag/runtime/audit_log.jsonl") _AUDIT_LOG_LOCK = _wa_th.Lock() # v2.99.150 — Beta invite codes + feedback _BETA_INVITES_PATH = _wa_pl.Path("tau_rag/runtime/beta_invites.jsonl") _BETA_INVITES_LOCK = _wa_th.Lock() _FEEDBACK_PATH = _wa_pl.Path("tau_rag/runtime/feedback.jsonl") _FEEDBACK_LOCK = _wa_th.Lock() def _validate_invite_code(code: str) -> dict: """Returns {ok, code, uses_left, label, expired} for a given code.""" import time code = (code or "").strip().upper() if not code: return {"ok": False, "reason": "empty_code"} if not _BETA_INVITES_PATH.exists(): return {"ok": False, "reason": "no_invites"} try: with open(_BETA_INVITES_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("code", "").upper() == code: if rec.get("expires_ts") and rec["expires_ts"] < time.time(): return {"ok": False, "reason": "expired"} uses_left = (rec.get("max_uses") or 1) - (rec.get("used_count") or 0) if uses_left <= 0: return {"ok": False, "reason": "exhausted"} return { "ok": True, "code": code, "label": rec.get("label", ""), "uses_left": uses_left, "max_uses": rec.get("max_uses") or 1, } return {"ok": False, "reason": "not_found"} except Exception as e: return {"ok": False, "reason": f"error:{type(e).__name__}"} def _consume_invite_code(code: str, who_email: str = "") -> bool: """Increment used_count atomically. Returns True if consumed.""" import time code = (code or "").strip().upper() if not code: return False with _BETA_INVITES_LOCK: items = [] if _BETA_INVITES_PATH.exists(): try: with open(_BETA_INVITES_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue except Exception: return False found = False for rec in items: if rec.get("code", "").upper() == code: rec["used_count"] = (rec.get("used_count") or 0) + 1 used_by = rec.setdefault("used_by", []) used_by.append({ "email": who_email, "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), }) found = True break if not found: return False tmp = _BETA_INVITES_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_BETA_INVITES_PATH) return True class _CreateInviteBody(BaseModel): # type: ignore code: Optional[str] = "" # auto-generate if empty max_uses: int = 1 expires_in_days: Optional[int] = None # None = never label: Optional[str] = "" # internal note @app.post("/v1/admin/invite/create") def admin_invite_create(body: _CreateInviteBody, token: str = ""): # type: ignore """Admin: create a beta invite code.""" import os, time, secrets, string admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) code = (body.code or "").strip().upper() if not code: alphabet = string.ascii_uppercase + string.digits # Avoid ambiguous chars alphabet = alphabet.replace("0", "").replace("O", "").replace("1", "").replace("I", "").replace("L", "") code = "".join(secrets.choice(alphabet) for _ in range(8)) # Check duplicate if _validate_invite_code(code).get("ok"): return JSONResponse(status_code=409, content={"ok": False, "reason": "duplicate"}) record = { "code": code, "label": (body.label or "").strip(), "max_uses": max(1, int(body.max_uses)), "used_count": 0, "used_by": [], "created_ts": time.time(), "created_iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "expires_ts": (time.time() + body.expires_in_days * 86400) if body.expires_in_days else None, "expires_iso": (time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime(time.time() + body.expires_in_days * 86400)) if body.expires_in_days else None), } with _BETA_INVITES_LOCK: _BETA_INVITES_PATH.parent.mkdir(parents=True, exist_ok=True) with open(_BETA_INVITES_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") _audit_log(actor="admin", role="admin", action="invite_created", target=code, meta={"label": record["label"], "max_uses": record["max_uses"]}) return {"ok": True, "code": code, "record": record} @app.get("/v1/admin/invite/list") def admin_invite_list(token: str = ""): # type: ignore """Admin: list all invite codes with usage.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if not _BETA_INVITES_PATH.exists(): return {"ok": True, "n": 0, "items": []} items = [] with open(_BETA_INVITES_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue items.sort(key=lambda r: -(r.get("created_ts") or 0)) return {"ok": True, "n": len(items), "items": items} @app.post("/v1/beta/invite/validate") def beta_invite_validate(req: dict): # type: ignore """Public: validate a code without consuming. Used by onboarding form to give live feedback as user types.""" return _validate_invite_code((req or {}).get("code", "")) class _FeedbackBody(BaseModel): # type: ignore message: str kind: Optional[str] = "general" # bug | suggestion | general | praise page: Optional[str] = "" # URL where they were rating: Optional[int] = None # 1-5 hp: Optional[str] = "" # v2.99.180 — honeypot hp2: Optional[str] = "" # v2.99.180 — honeypot # v2.99.180 — Spam protection for public POST endpoints. # IP-based token bucket: per-IP, per-endpoint-key, refilling at N/min. # Lighter than the webhook rate limiter (_wa_check_rate_limit) which is # auth-aware. This one just protects against burst spam. _PUBLIC_RATE_BUCKETS: Dict[str, dict] = {} _PUBLIC_RATE_LOCK = _wa_th.Lock() def _client_ip(request) -> str: """Best-effort client IP (works behind Vercel/HF proxies).""" try: xff = request.headers.get("x-forwarded-for", "") if xff: return xff.split(",")[0].strip() return (request.client.host if request.client else "unknown") or "unknown" except Exception: return "unknown" def _public_rate_check(request, endpoint_key: str, per_min: int = 5, burst: int = 8) -> bool: """Returns True if request is allowed, False if rate-limited. Token bucket per (IP, endpoint_key). Defaults: 5/min, burst 8.""" import time ip = _client_ip(request) key = f"{endpoint_key}:{ip}" now = time.time() with _PUBLIC_RATE_LOCK: bucket = _PUBLIC_RATE_BUCKETS.get(key) if bucket is None: bucket = {"tokens": float(burst), "ts": now} _PUBLIC_RATE_BUCKETS[key] = bucket elapsed = now - bucket["ts"] bucket["tokens"] = min(float(burst), bucket["tokens"] + elapsed * (per_min / 60.0)) bucket["ts"] = now if bucket["tokens"] < 1.0: return False bucket["tokens"] -= 1.0 # Cleanup: drop buckets idle >15 min if len(_PUBLIC_RATE_BUCKETS) > 500: cutoff = now - 900 for k in list(_PUBLIC_RATE_BUCKETS.keys()): if _PUBLIC_RATE_BUCKETS[k]["ts"] < cutoff: del _PUBLIC_RATE_BUCKETS[k] return True def _honeypot_caught(*field_values) -> bool: """Return True if any honeypot field is non-empty (bot signature). Pass multiple values, all should be empty strings for a real human.""" for v in field_values: if v and v.strip(): return True return False # v2.99.177 — Newsletter subscribers _NEWSLETTER_PATH = _wa_pl.Path("tau_rag/runtime/newsletter_subscribers.jsonl") _NEWSLETTER_LOCK = _wa_th.Lock() class _NewsletterBody(BaseModel): # type: ignore email: str source: Optional[str] = "" # which page/CTA hp: Optional[str] = "" # v2.99.180 — honeypot (bots fill; humans never see) hp2: Optional[str] = "" # v2.99.180 — secondary honeypot @app.post("/v1/newsletter/subscribe") def newsletter_subscribe(req: _NewsletterBody, request: Request): # type: ignore """v2.99.177 — Append email to newsletter subscribers. Idempotent (silently dedupes if already subscribed). v2.99.180 — honeypot + IP rate limit (5/min, burst 8). v2.99.192 — double opt-in: subscribers start as `pending`, confirmation email sent with HMAC link. Sender skips pending users.""" import time, uuid, re # Honeypot — bots fill these fields; humans never see them. if _honeypot_caught(req.hp, req.hp2): return {"ok": True, "status": "subscribed"} if not _public_rate_check(request, "newsletter", per_min=5, burst=8): return JSONResponse(status_code=429, content={"ok": False, "reason": "rate_limited"}) em = (req.email or "").strip().lower() if not em or "@" not in em or "." not in em.split("@")[-1]: return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_email"}) # Length guard if len(em) > 200: return JSONResponse(status_code=400, content={"ok": False, "reason": "email_too_long"}) # Dedupe — check existing existing = set() if _NEWSLETTER_PATH.exists(): try: with open(_NEWSLETTER_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: existing.add(json.loads(line).get("email", "").lower()) except: continue except Exception: pass if em in existing: return {"ok": True, "status": "already_subscribed"} rec = { "id": str(uuid.uuid4())[:12], "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "email": em, "source": (req.source or "")[:120], "ip": (request.client.host if request.client else "")[:45], "ua": (request.headers.get("user-agent", "") or "")[:200], # v2.99.192 — double opt-in: starts as pending until user clicks link "pending": True, } try: _NEWSLETTER_PATH.parent.mkdir(parents=True, exist_ok=True) with _NEWSLETTER_LOCK: with open(_NEWSLETTER_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(rec, ensure_ascii=False) + "\n") _audit_log(actor=em, role="user", action="newsletter_subscribe", target=rec["id"], meta={"source": rec["source"]}) # v2.99.192 — fire verification email (best-effort, async) try: _send_newsletter_verification_async(em, rec.get("source", "")) except Exception: pass return {"ok": True, "status": "pending_verification"} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) def _newsletter_verify_token(email: str) -> str: """v2.99.192 — HMAC token for double opt-in verification. Distinct from unsubscribe token (different domain prefix).""" import hmac, hashlib, os secret = os.environ.get("LE_ADMIN_TOKEN", "le-fallback-secret").encode("utf-8") msg = ("verify:" + email.lower()).encode("utf-8") return hmac.new(secret, msg, hashlib.sha256).hexdigest()[:24] def _send_newsletter_verification_async(email: str, source: str = "") -> None: """Send branded verification email asynchronously.""" import threading from urllib.parse import quote def _worker(): verify_url = ( f"https://legal-eye.1bigfam.com/v1/newsletter/verify" f"?email={quote(email)}&t={_newsletter_verify_token(email)}" ) body = ( f"שלום,\n\n" f"ביקשת לקבל newsletter מ-legal-eye. כדי לאשר את המנוי, לחץ על הקישור:\n\n" f"{verify_url}\n\n" f"אם לא ביקשת — התעלם מהאימייל. לא נשמור את האימייל שלך אם לא תאשר.\n\n" f"הקישור תקף ל-7 ימים." ) _send_email_branded( email, "legal-eye · אישור מנוי newsletter", body, title="אשר את המנוי", cta_label="אשר מנוי ←", cta_url=verify_url, ) threading.Thread(target=_worker, daemon=True).start() @app.get("/v1/newsletter/verify") def newsletter_verify(email: str = "", t: str = ""): # type: ignore """v2.99.192 — Double opt-in: verify email via HMAC token. Marks subscriber as no-longer-pending and adds verified_ts.""" from fastapi.responses import HTMLResponse import time em = (email or "").strip().lower() expected = _newsletter_verify_token(em) if em else "" if not em or not t or t != expected: return HTMLResponse(content=_unsub_page(False, "קישור לא תקף או פג תוקף."), status_code=400) items = _load_newsletter_subscribers() found = False for it in items: if (it.get("email", "") or "").lower() == em: it.pop("pending", None) it["verified_ts"] = time.time() it["verified_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) found = True if not found: return HTMLResponse(content=_unsub_page(False, "האימייל לא נמצא ברשימה. אולי כבר אישרת או הוסרת."), status_code=404) _save_newsletter_subscribers(items) _audit_log(actor=em, role="user", action="newsletter_verified", target="(self)", meta={}) return HTMLResponse(content=_verify_success_page(em)) def _verify_success_page(email: str) -> str: """Branded confirmation page after successful verification.""" return f""" ✓ אושר · legal-eye

המנוי אושר

תקבל מאיתנו עדכון אחד ~פעם בחודש: השקות, eval שבועי, החלטות גדולות. ביטול בכל עת.

אימייל: {email}

לחזרה לאתר ←

""" @app.get("/v1/admin/newsletter/list") def admin_newsletter_list(token: str = "", limit: int = 500): # type: ignore """v2.99.177 — Admin: list newsletter subscribers.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) items = [] if _NEWSLETTER_PATH.exists(): try: with open(_NEWSLETTER_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except: continue except Exception: pass items.sort(key=lambda r: -(r.get("ts") or 0)) return {"ok": True, "n_total": len(items), "items": items[:max(1, min(limit, 2000))]} # Add newsletter file to backup try: if "tau_rag/runtime/newsletter_subscribers.jsonl" not in _BACKUP_FILE_PATHS: _BACKUP_FILE_PATHS.append("tau_rag/runtime/newsletter_subscribers.jsonl") except Exception: pass # v2.99.184 — Newsletter sender + unsubscribe (HMAC-token based) def _newsletter_token(email: str) -> str: """Generate stable HMAC token for unsubscribe link. Uses LE_ADMIN_TOKEN as secret. 24-hex chars (96-bit) — enough to prevent brute force, short enough to fit in URL.""" import hmac, hashlib, os secret = os.environ.get("LE_ADMIN_TOKEN", "le-fallback-secret").encode("utf-8") return hmac.new(secret, email.lower().encode("utf-8"), hashlib.sha256).hexdigest()[:24] def _newsletter_unsubscribe_url(email: str) -> str: """Build the public unsubscribe URL for a subscriber.""" from urllib.parse import quote return f"https://legal-eye.1bigfam.com/v1/newsletter/unsubscribe?email={quote(email)}&t={_newsletter_token(email)}" def _load_newsletter_subscribers() -> List[dict]: items = [] if not _NEWSLETTER_PATH.exists(): return items try: with open(_NEWSLETTER_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except: continue except Exception: pass return items def _save_newsletter_subscribers(items: List[dict]) -> None: """Atomic JSONL rewrite.""" with _NEWSLETTER_LOCK: tmp = _NEWSLETTER_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for it in items: f.write(json.dumps(it, ensure_ascii=False) + "\n") tmp.replace(_NEWSLETTER_PATH) @app.get("/v1/newsletter/unsubscribe") def newsletter_unsubscribe(email: str = "", t: str = ""): # type: ignore """v2.99.184 — Public unsubscribe with HMAC token verification. Marks subscriber with unsubscribed_ts; doesn't delete (so admin can see history).""" from fastapi.responses import HTMLResponse import time em = (email or "").strip().lower() expected = _newsletter_token(em) if em else "" if not em or not t or t != expected: return HTMLResponse(content=_unsub_page(False, "קישור לא תקף או פג תוקף."), status_code=400) items = _load_newsletter_subscribers() found = False for it in items: if (it.get("email", "") or "").lower() == em: it["unsubscribed_ts"] = time.time() it["unsubscribed_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) found = True if not found: return HTMLResponse(content=_unsub_page(False, "האימייל לא נמצא ברשימת המנויים."), status_code=404) _save_newsletter_subscribers(items) _audit_log(actor=em, role="user", action="newsletter_unsubscribe", target="(self)", meta={}) return HTMLResponse(content=_unsub_page(True, em)) def _unsub_page(success: bool, detail: str) -> str: """Branded HTML response for unsubscribe action.""" if success: title = "המנוי הוסר ✓" body = f"לא תקבל יותר אימיילים מ-legal-eye.

אימייל: {detail}" else: title = "לא ניתן להסיר" body = detail return f""" {title} · legal-eye

{title}

{body}

לחזרה לאתר ←

""" class _NewsletterSendBody(BaseModel): # type: ignore subject: str body_text: str body_html: Optional[str] = "" # if empty, body_text is wrapped in branded template dry_run: Optional[bool] = False # if True, count but don't send _NEWSLETTER_SENDS_PATH = _wa_pl.Path("tau_rag/runtime/newsletter_sends.jsonl") @app.post("/v1/admin/newsletter/send") def admin_newsletter_send(req: _NewsletterSendBody, token: str = ""): # type: ignore """v2.99.184 — Send a newsletter to ALL subscribed (non-unsubscribed) emails. Each email gets a unique unsubscribe link in the footer. Returns send stats (count + per-email status). Logs to newsletter_sends.jsonl for audit.""" import os, time, uuid admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) subject = (req.subject or "").strip() body_text = (req.body_text or "").strip() if not subject or len(subject) < 3: return JSONResponse(status_code=400, content={"ok": False, "reason": "subject_required"}) if not body_text or len(body_text) < 10: return JSONResponse(status_code=400, content={"ok": False, "reason": "body_required"}) subscribers = _load_newsletter_subscribers() # v2.99.192 — skip pending (unverified) subscribers, not just unsubscribed active = [s for s in subscribers if not s.get("unsubscribed_ts") and not s.get("pending")] emails = [s.get("email", "").lower() for s in active if s.get("email")] emails = list(dict.fromkeys(emails)) # dedupe preserving order if req.dry_run: return {"ok": True, "dry_run": True, "n_recipients": len(emails), "preview_emails": emails[:5]} # Iterate + send. Each email gets its own unsubscribe link. send_id = str(uuid.uuid4())[:12] sent = [] failed = [] started_ts = time.time() for em in emails: unsub_url = _newsletter_unsubscribe_url(em) # Append unsubscribe block to body body_with_unsub = ( body_text + "\n\n────────────────────────────────────────\n" "להסרת המנוי באופן מיידי, לחץ כאן:\n" f"{unsub_url}\n" ) ok = _send_email_branded( em, subject, body_with_unsub, title=subject, cta_label="לאתר →", cta_url="https://legal-eye.1bigfam.com/", timeout=30, ) if ok: sent.append(em) else: failed.append(em) elapsed = time.time() - started_ts # Audit + persist send log record = { "send_id": send_id, "ts": started_ts, "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime(started_ts)), "subject": subject, "n_attempted": len(emails), "n_sent": len(sent), "n_failed": len(failed), "elapsed_sec": round(elapsed, 1), } try: _NEWSLETTER_SENDS_PATH.parent.mkdir(parents=True, exist_ok=True) with open(_NEWSLETTER_SENDS_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") except Exception: pass _audit_log(actor="admin", role="admin", action="newsletter_sent", target=send_id, meta={"n_sent": len(sent), "n_failed": len(failed), "subject": subject}) return { "ok": True, "send_id": send_id, "n_attempted": len(emails), "n_sent": len(sent), "n_failed": len(failed), "elapsed_sec": round(elapsed, 1), "failed_emails": failed[:20], # first 20 for inspection } @app.get("/v1/admin/newsletter/sends") def admin_newsletter_sends_list(token: str = "", limit: int = 50): # type: ignore """v2.99.184 — Admin: history of newsletter sends.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) items = [] if _NEWSLETTER_SENDS_PATH.exists(): try: with open(_NEWSLETTER_SENDS_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except: continue except Exception: pass items.sort(key=lambda r: -(r.get("ts") or 0)) return {"ok": True, "n_total": len(items), "items": items[:max(1, min(limit, 500))]} # Add newsletter sends file to backup try: if "tau_rag/runtime/newsletter_sends.jsonl" not in _BACKUP_FILE_PATHS: _BACKUP_FILE_PATHS.append("tau_rag/runtime/newsletter_sends.jsonl") except Exception: pass # v2.99.187 — Test data cleanup # Default email patterns that suggest test data (used by C-phase smoke tests). _DEFAULT_TEST_PATTERNS = [ "@test.com", "@example.com", "@example.net", "@example.org", "@invalid.com", "@nowhere.com", "+test@", "+smoke@", "burst+", "test+", "@spam.com", "@mailinator.com", "@guerrillamail.", ] def _record_matches_patterns(rec: dict, patterns: List[str], email_fields: List[str]) -> bool: """True if any of the record's email fields contains any pattern.""" for fld in email_fields: v = (rec.get(fld, "") or "").lower() if not v: continue for p in patterns: if p.lower() in v: return True return False _CLEANUP_TARGETS = [ # (jsonl_path, [email_fields_to_check], lock) ("tau_rag/runtime/lawyer_requests.jsonl", ["contact", "user_email", "name"], "_LAWYER_REQUEST_LOCK"), ("tau_rag/runtime/lawyer_applications.jsonl", ["email"], "_LAWYER_APP_LOCK"), ("tau_rag/runtime/newsletter_subscribers.jsonl", ["email"], "_NEWSLETTER_LOCK"), ("tau_rag/runtime/feedback.jsonl", ["email", "user_email"], None), ("tau_rag/runtime/triage_log.jsonl", ["text"], "_TRIAGE_LOG_LOCK"), # users.jsonl handled separately via auth module ] @app.get("/v1/admin/search") def admin_search(q: str = "", token: str = "", limit: int = 20): # type: ignore """v2.99.191 — Cross-record admin search. Searches across users, lawyer_applications, lawyer_requests, newsletter_subscribers, feedback. Returns max `limit` per source. Case-insensitive substring match on relevant fields.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) q = (q or "").strip().lower() if len(q) < 2: return JSONResponse(status_code=400, content={"ok": False, "reason": "query_too_short", "hint": "Min 2 chars"}) def load_jsonl(p): items = [] if not p.exists(): return items try: with open(p, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except: continue except Exception: pass return items def match(rec, fields): """True if any of the listed fields contains q.""" for f in fields: v = (rec.get(f, "") or "") if isinstance(v, list): v = " ".join(str(x) for x in v) v = str(v).lower() if q in v: return True return False results = {} # Lawyer applications apps = load_jsonl(_LAWYER_APP_PATH) app_matches = [a for a in apps if match(a, ["full_name", "email", "license_number", "phone", "city", "bio"])] results["lawyer_applications"] = { "n": len(app_matches), "items": [ { "id": a.get("id", "")[:8], "full_name": a.get("full_name", ""), "email": a.get("email", ""), "license_number": a.get("license_number", ""), "status": a.get("status", "submitted"), "city": a.get("city", ""), "iso": a.get("iso", ""), "link": "/admin-leads/lawyer-applications.html", } for a in app_matches[:limit] ], } # Lawyer requests reqs = load_jsonl(_LAWYER_REQUEST_PATH) req_matches = [r for r in reqs if match(r, ["name", "contact", "question", "user_email", "assigned_lawyer_name"])] results["lawyer_requests"] = { "n": len(req_matches), "items": [ { "id": r.get("id", "")[:8], "name": r.get("name", ""), "contact": r.get("contact", ""), "question": (r.get("question", "") or "")[:80], "status": r.get("status", "new"), "domain": r.get("domain", ""), "iso": r.get("iso", ""), "link": "/admin-leads/lawyer-requests.html", } for r in req_matches[:limit] ], } # Newsletter subscribers subs = load_jsonl(_NEWSLETTER_PATH) sub_matches = [s for s in subs if match(s, ["email", "source"])] results["newsletter_subscribers"] = { "n": len(sub_matches), "items": [ { "email": s.get("email", ""), "source": s.get("source", ""), "iso": s.get("iso", ""), "unsubscribed": bool(s.get("unsubscribed_ts")), "link": "/admin-leads/newsletter.html", } for s in sub_matches[:limit] ], } # Users (via auth module) user_matches = [] try: from ..auth import list_users users = list_users() user_matches = [u for u in users if match(u, ["email", "id"])] except Exception: pass results["users"] = { "n": len(user_matches), "items": [ { "id": u.get("id", "")[:12], "email": u.get("email", ""), "created_iso": u.get("created_iso", ""), "link": "", } for u in user_matches[:limit] ], } # Feedback fbs = load_jsonl(_FEEDBACK_PATH) fb_matches = [f for f in fbs if match(f, ["message", "email", "user_email", "page", "kind"])] results["feedback"] = { "n": len(fb_matches), "items": [ { "id": f.get("id", "")[:8], "kind": f.get("kind", "general"), "message": (f.get("message", "") or "")[:100], "page": f.get("page", ""), "iso": f.get("iso", ""), "link": "/admin-leads/audit-log.html", } for f in fb_matches[:limit] ], } total = sum(v["n"] for v in results.values()) return {"ok": True, "q": q, "total_matches": total, "by_source": results} @app.post("/v1/admin/cleanup/scan") def admin_cleanup_scan(request: Request, token: str = ""): # type: ignore """v2.99.187 — Dry-run: scan all relevant JSONLs and return matches. POST body: optional {patterns: [...]} to override defaults.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) # Get patterns from query or body (sync read) try: import asyncio body = asyncio.run(request.json()) if False else None # noqa except Exception: body = None patterns = _DEFAULT_TEST_PATTERNS[:] custom = request.query_params.get("patterns", "") if custom: patterns = [p.strip() for p in custom.split(",") if p.strip()] summary = {} total = 0 for path_str, fields, _ in _CLEANUP_TARGETS: p = _wa_pl.Path(path_str) if not p.exists(): summary[path_str] = {"total": 0, "matches": 0, "sample": []} continue items = [] try: with open(p, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except: continue except Exception: continue matches = [it for it in items if _record_matches_patterns(it, patterns, fields)] summary[path_str] = { "total": len(items), "matches": len(matches), "sample": [ { "email": next((it.get(f, "") for f in fields if it.get(f)), ""), "iso": it.get("iso", ""), "id": it.get("id", "")[:8], } for it in matches[:5] ], } total += len(matches) return { "ok": True, "patterns": patterns, "total_matches": total, "by_file": summary, } @app.post("/v1/admin/cleanup/delete") def admin_cleanup_delete(request: Request, token: str = "", confirm: str = ""): # type: ignore """v2.99.187 — DESTRUCTIVE: remove matching records from all JSONL files. Requires confirm=YES_DELETE_TEST_DATA. Atomic per-file rewrite. Audit-logged. Returns counts deleted per file.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if confirm != "YES_DELETE_TEST_DATA": return JSONResponse(status_code=400, content={ "ok": False, "reason": "confirm_required", "hint": "Pass ?confirm=YES_DELETE_TEST_DATA to acknowledge.", }) patterns = _DEFAULT_TEST_PATTERNS[:] custom = request.query_params.get("patterns", "") if custom: patterns = [p.strip() for p in custom.split(",") if p.strip()] result = {} grand_deleted = 0 for path_str, fields, _ in _CLEANUP_TARGETS: p = _wa_pl.Path(path_str) if not p.exists(): result[path_str] = {"deleted": 0, "kept": 0} continue items = [] try: with open(p, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except: continue except Exception: continue kept = [it for it in items if not _record_matches_patterns(it, patterns, fields)] deleted_n = len(items) - len(kept) if deleted_n > 0: # Atomic rewrite tmp = p.with_suffix(p.suffix + ".cleanup.tmp") with open(tmp, "w", encoding="utf-8") as f: for it in kept: f.write(json.dumps(it, ensure_ascii=False) + "\n") tmp.replace(p) result[path_str] = {"deleted": deleted_n, "kept": len(kept)} grand_deleted += deleted_n _audit_log(actor="admin", role="admin", action="test_data_cleanup", target="(bulk)", meta={"patterns": patterns, "n_deleted": grand_deleted}) return {"ok": True, "patterns": patterns, "total_deleted": grand_deleted, "by_file": result} @app.post("/v1/feedback/submit") def feedback_submit(body: _FeedbackBody, request: Request): # type: ignore """Anyone (signed-in or anonymous) can submit feedback. v2.99.180 — honeypot + IP rate limit (5/min, burst 8).""" import time, uuid if _honeypot_caught(body.hp, body.hp2): return {"ok": True, "id": "drop"} if not _public_rate_check(request, "feedback", per_min=5, burst=8): return JSONResponse(status_code=429, content={"ok": False, "reason": "rate_limited"}) try: msg = (body.message or "").strip() if len(msg) < 4: return JSONResponse(status_code=400, content={"ok": False, "reason": "message_too_short"}) # Best-effort identity from auth header signed_in = None try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if sess: signed_in = {"id": sess["user_id"], "email": sess["email"]} except: pass record = { "id": str(uuid.uuid4())[:12], "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "message": msg[:4000], "kind": (body.kind or "general") if body.kind in (None, "bug", "suggestion", "general", "praise") else "general", "page": (body.page or "")[:200], "rating": body.rating if (body.rating and 1 <= body.rating <= 5) else None, "user_id": signed_in["id"] if signed_in else None, "user_email": signed_in["email"] if signed_in else None, "user_agent": (request.headers.get("user-agent") or "")[:200], "status": "new", } _FEEDBACK_PATH.parent.mkdir(parents=True, exist_ok=True) with _FEEDBACK_LOCK: with open(_FEEDBACK_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") _audit_log( actor=signed_in["email"] if signed_in else "anonymous", role="user" if signed_in else "anonymous", action="feedback_submitted", target=record["id"], meta={"kind": record["kind"], "rating": record["rating"]}, ) return {"ok": True, "id": record["id"], "message": "תודה על המשוב!"} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) @app.get("/v1/admin/feedback") def admin_feedback_list(token: str = "", limit: int = 200, kind: str = ""): # type: ignore """Admin: list submitted feedback.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if not _FEEDBACK_PATH.exists(): return {"ok": True, "n": 0, "items": []} items = [] with open(_FEEDBACK_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if kind and rec.get("kind") != kind: continue items.append(rec) items.sort(key=lambda r: -(r.get("ts") or 0)) return {"ok": True, "n": len(items), "items": items[:max(1, min(limit, 2000))]} def _audit_log(actor: str, role: str, action: str, target: str = "", meta: Optional[dict] = None) -> None: """Append an audit event. Best-effort, never raises. Args: actor: email or 'system' role: user | lawyer | admin | system | anonymous action: short snake_case event id (e.g. 'lawyer_assigned') target: request_id / lawyer_id / user_id / etc. meta: free-form dict (kept compact — avoid PII bombs) v2.99.195 — if LE_AUDIT_WEBHOOK_URL is set, also POSTs the event to that URL in a background thread (best-effort, silent on fail). Useful for Slack/Discord/Zapier/n8n integrations.""" import time, uuid try: _AUDIT_LOG_PATH.parent.mkdir(parents=True, exist_ok=True) rec = { "id": str(uuid.uuid4())[:12], "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "actor": (actor or "system").strip().lower(), "role": role or "system", "action": action, "target": target or "", "meta": meta or {}, } with _AUDIT_LOG_LOCK: with open(_AUDIT_LOG_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(rec, ensure_ascii=False) + "\n") # v2.99.195 — fire webhook async (don't block main path) try: _fire_audit_webhook_async(rec) except Exception: pass except Exception as e: print(f"[audit] FAIL: {type(e).__name__}: {e}") def _fire_audit_webhook_async(audit_rec: dict) -> None: """v2.99.195 — POST audit event to LE_AUDIT_WEBHOOK_URL if set. Adapts payload format based on detected webhook target. Best-effort, runs in background thread, silent on failure.""" import os, threading url = os.environ.get("LE_AUDIT_WEBHOOK_URL", "").strip() if not url: return def _worker(): import urllib.request, urllib.error # Detect target type — Slack/Discord want {text}, generic webhooks # accept the full record. Slack URLs are hooks.slack.com/services/... is_slack = "hooks.slack.com" in url is_discord = "discord.com/api/webhooks" in url or "discordapp.com" in url # Build a human-readable summary line actor = audit_rec.get("actor", "?")[:40] action = audit_rec.get("action", "?") target = audit_rec.get("target", "")[:20] emoji = { "newsletter_subscribe": "📬", "newsletter_verified": "✓", "newsletter_unsubscribe": "🚫", "newsletter_sent": "📤", "lawyer_application_submitted": "⚖", "lawyer_assigned": "🎯", "lawyer_offered": "⚡", "deliverable_submitted": "📄", "deliverable_approved": "✓", "deliverable_disputed": "⚠", "dispute_resolved": "⚖", "auto_release": "⏱", "invite_consumed": "🎟", "test_data_cleanup": "🧹", "lawyer_note_added": "📝", "backup_download": "💾", "backup_restore": "🔄", }.get(action, "·") summary = f"{emoji} {action} · {actor}" + (f" · {target}" if target else "") if is_slack or is_discord: payload = {"text": summary, "content": summary} # both fields cover slack+discord else: payload = { "kind": "legal-eye-audit", "summary": summary, "event": audit_rec, } try: req = urllib.request.Request( url, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json", "User-Agent": "legal-eye/audit-webhook"}, method="POST", ) with urllib.request.urlopen(req, timeout=10) as r: if not (200 <= r.status < 300): print(f"[audit-webhook] non-2xx: {r.status}") except Exception as e: print(f"[audit-webhook] FAIL: {type(e).__name__}: {str(e)[:120]}") threading.Thread(target=_worker, daemon=True).start() @app.get("/v1/admin/dashboard/stream") async def admin_dashboard_stream(token: str = ""): # type: ignore """v2.99.193 — Server-Sent Events stream of dashboard data. Pushes a fresh snapshot every 5s for up to 5 min, then client auto-reconnects. EventSource doesn't support custom headers so token is passed in query string.""" import os, asyncio, json as _j from fastapi.responses import StreamingResponse admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) async def event_gen(): # Tell client to reconnect after 5s if we drop yield "retry: 5000\n\n" for tick in range(60): # 60 * 5s = 5 min then close try: # Call the sync compute function via thread executor so we # don't block the event loop. The function call returns # either a dict or a JSONResponse (on auth fail, but we # already passed auth so it'll be a dict). data = await asyncio.to_thread(admin_dashboard, token) if not isinstance(data, dict): # Auth probably expired mid-stream yield f"event: auth-expired\ndata: {{}}\n\n" return payload = _j.dumps(data, ensure_ascii=False) yield f"data: {payload}\n\n" except Exception as e: err = _j.dumps({"ok": False, "error": str(e)}) yield f"event: error\ndata: {err}\n\n" await asyncio.sleep(5) # Graceful close — client EventSource will auto-reconnect yield "event: close\ndata: {}\n\n" return StreamingResponse( event_gen(), media_type="text/event-stream", headers={ "Cache-Control": "no-cache, no-transform", "Connection": "keep-alive", "X-Accel-Buffering": "no", # disable nginx buffering }, ) @app.get("/v1/admin/dashboard") def admin_dashboard(token: str = ""): # type: ignore """v2.99.154 — Single-call aggregator for the founder overview dashboard. Returns: 7d KPIs vs prior 7d, funnel, activity feed (last 24h), alerts, system health. Admin-token-gated.""" import os, time admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) def _read(p): out = [] try: if not p.exists(): return out with open(p, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: out.append(json.loads(line)) except: continue except Exception: pass return out now = time.time() w1_start = now - 7 * 24 * 3600 # last 7 days w2_start = now - 14 * 24 * 3600 # prior 7 days requests = _read(_LAWYER_REQUEST_PATH) apps = _read(_LAWYER_APP_PATH) audit = _read(_AUDIT_LOG_PATH) notifs = _read(_NOTIFICATIONS_PATH) feedback = _read(_FEEDBACK_PATH) triage = _read(_TRIAGE_LOG_PATH) try: from ..auth import USERS_PATH users = _read(USERS_PATH) except Exception: users = [] def _count_in(items, ts_key, lo, hi): return sum(1 for it in items if lo <= (it.get(ts_key) or 0) < hi) def _delta(curr, prev): if prev == 0: return None if curr == 0 else 100.0 return round(((curr - prev) / prev) * 100.0, 1) # ── KPIs: last 7d vs prior 7d kpis = {} for label, items, ts_key in [ ("new_users", users, "created_ts"), ("new_requests", requests, "ts"), ("new_lawyers", apps, "ts"), ("triage_runs", triage, "ts"), ("feedback_items", feedback, "ts"), ]: c = _count_in(items, ts_key, w1_start, now) p = _count_in(items, ts_key, w2_start, w1_start) kpis[label] = {"curr": c, "prev": p, "delta_pct": _delta(c, p)} # Deliverable / approve counts (from requests records) delivered_c = sum(1 for r in requests if (r.get("deliverable_ts") or 0) >= w1_start) delivered_p = sum(1 for r in requests if w2_start <= (r.get("deliverable_ts") or 0) < w1_start) approved_c = sum(1 for r in requests if (r.get("approval_ts") or 0) >= w1_start) approved_p = sum(1 for r in requests if w2_start <= (r.get("approval_ts") or 0) < w1_start) disputed_c = sum(1 for r in requests if (r.get("dispute_ts") or 0) >= w1_start) disputed_p = sum(1 for r in requests if w2_start <= (r.get("dispute_ts") or 0) < w1_start) kpis["deliverables"] = {"curr": delivered_c, "prev": delivered_p, "delta_pct": _delta(delivered_c, delivered_p)} kpis["approvals"] = {"curr": approved_c, "prev": approved_p, "delta_pct": _delta(approved_c, approved_p)} kpis["disputes"] = {"curr": disputed_c, "prev": disputed_p, "delta_pct": _delta(disputed_c, disputed_p)} # ── Funnel (cumulative, all-time) funnel = { "triage_runs": len(triage), "requests": len(requests), "matched": sum(1 for r in requests if r.get("assigned_lawyer_id")), "accepted": sum(1 for r in requests if r.get("lawyer_response") == "accepted"), "delivered": sum(1 for r in requests if r.get("deliverable_ts")), "approved": sum(1 for r in requests if r.get("approval_ts") and not r.get("auto_approved")), "auto_released": sum(1 for r in requests if r.get("approval_ts") and r.get("auto_approved")), "disputed": sum(1 for r in requests if r.get("status") == "disputed"), "dispute_closed": sum(1 for r in requests if r.get("dispute_resolution_ts")), } # ── Activity feed (last 24h, mixed sources, sorted desc) activity_cutoff = now - 24 * 3600 activity = [] for r in requests: if (r.get("ts") or 0) >= activity_cutoff: activity.append({ "ts": r["ts"], "iso": r.get("iso", ""), "kind": "request", "icon": "💬", "title": f"פנייה חדשה · {r.get('domain', '?')}", "summary": (r.get('question', '') or '')[:80], "link": f"/admin-leads/lawyer-requests.html?id={r.get('id', '')[:8]}", }) for a in apps: if (a.get("ts") or 0) >= activity_cutoff: activity.append({ "ts": a["ts"], "iso": a.get("iso", ""), "kind": "lawyer_app", "icon": "⚖", "title": f"עו\"ד חדש · {a.get('full_name', '?')}", "summary": f"{', '.join(a.get('domains', []) or [])[:60]}", "link": "/admin-leads/lawyer-applications.html", }) for u in users: if (u.get("created_ts") or 0) >= activity_cutoff: activity.append({ "ts": u["created_ts"], "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime(u.get("created_ts") or 0)), "kind": "user", "icon": "👤", "title": "משתמש חדש נרשם", "summary": (u.get("email", "") or "")[:50], "link": "", }) for ev in audit: if (ev.get("ts") or 0) >= activity_cutoff and ev.get("action") in ( "deliverable_submitted", "deliverable_approved", "deliverable_disputed", "dispute_resolved", "auto_release", "invite_consumed", ): activity.append({ "ts": ev["ts"], "iso": ev.get("iso", ""), "kind": "audit", "icon": "✓", "title": ev.get("action", "?"), "summary": f"{ev.get('actor','?')} · {ev.get('target','?')[:30]}", "link": "/admin-leads/audit-log.html", }) activity.sort(key=lambda x: -(x.get("ts") or 0)) activity = activity[:40] # ── Alerts (current state, not windowed) alerts = [] open_disp = [r for r in requests if r.get("status") == "disputed"] pending_apps = [a for a in apps if (a.get("status") or "submitted") in ("submitted", "under_review")] stale_new = [r for r in requests if r.get("status") in ("new", "offered") and (now - (r.get("ts") or now)) > 24 * 3600] stuck_deliv = [r for r in requests if r.get("status") == "contacted" and r.get("lawyer_response") == "accepted" and (now - (r.get("assigned_ts") or now)) > 48 * 3600] auto_release_imminent = [r for r in requests if r.get("status") == "submitted_for_approval" and r.get("deliverable_ts") and (now - r["deliverable_ts"]) > 60 * 3600] if open_disp: alerts.append({"kind": "dispute", "level": "danger", "title": f"{len(open_disp)} מחלוקות פתוחות", "link": "/admin-leads/lawyer-requests.html?status=disputed", "count": len(open_disp)}) if pending_apps: alerts.append({"kind": "verify", "level": "warn", "title": f"{len(pending_apps)} בקשות עו\"ד ממתינות לאימות", "link": "/admin-leads/lawyer-applications.html", "count": len(pending_apps)}) if stale_new: alerts.append({"kind": "stale", "level": "warn", "title": f"{len(stale_new)} פניות 'new/offered' >24 שעות", "link": "/admin-leads/lawyer-requests.html", "count": len(stale_new)}) if stuck_deliv: alerts.append({"kind": "stuck", "level": "warn", "title": f"{len(stuck_deliv)} פניות שעו\"ד קיבל ולא הגיש >48 שעות", "link": "/admin-leads/lawyer-requests.html", "count": len(stuck_deliv)}) if auto_release_imminent: alerts.append({"kind": "auto_release", "level": "info", "title": f"{len(auto_release_imminent)} פניות לקראת auto-release (>60h)", "link": "/admin-leads/lawyer-requests.html", "count": len(auto_release_imminent)}) # ── System health backup_state_path = _wa_pl.Path("tau_rag/runtime/admin_backup_state.json") backup_state = {} try: if backup_state_path.exists(): backup_state = json.loads(backup_state_path.read_text(encoding="utf-8")) except Exception: pass digest_state = {} try: if _DIGEST_STATE_PATH.exists(): digest_state = json.loads(_DIGEST_STATE_PATH.read_text(encoding="utf-8")) except Exception: pass health = { "smtp_configured": bool(os.environ.get("LE_LEAD_EMAIL_SMTP_USER", "").strip() and os.environ.get("LE_LEAD_EMAIL_SMTP_PASS", "").strip()), "admin_token_configured": bool(os.environ.get("LE_ADMIN_TOKEN", "").strip()), "lead_email_configured": bool(os.environ.get("LE_LEAD_EMAIL_TO", "").strip()), "webhook_configured": bool(os.environ.get("LE_LEAD_WEBHOOK_URL", "").strip()), "backup_webhook_configured": bool(os.environ.get("LE_BACKUP_WEBHOOK_URL", "").strip()), "audit_webhook_configured": bool(os.environ.get("LE_AUDIT_WEBHOOK_URL", "").strip()), # v2.99.195 "beta_gated": bool(os.environ.get("LE_BETA_GATED", "").strip() == "1"), "backup_last_iso": backup_state.get("last_backup_iso", ""), "backup_n_files": backup_state.get("n_files", 0), "digest_last_iso": digest_state.get("last_sent_iso", ""), } return { "ok": True, "snapshot_ts": now, "kpis": kpis, "funnel": funnel, "activity": activity, "alerts": alerts, "health": health, "totals": { "all_users": len(users), "all_requests": len(requests), "all_lawyers": len(apps), "all_audit": len(audit), "all_feedback": len(feedback), }, } @app.get("/v1/admin/audit-log") def admin_audit_log(token: str = "", limit: int = 200, role: str = "", action: str = "", actor: str = ""): # type: ignore """Admin: query the audit log. Filters: role, action prefix, actor email.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if not _AUDIT_LOG_PATH.exists(): return {"ok": True, "n": 0, "n_total": 0, "items": []} items = [] try: with open(_AUDIT_LOG_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if role and rec.get("role") != role: continue if action and not (rec.get("action", "")).startswith(action): continue if actor and rec.get("actor") != actor.strip().lower(): continue items.append(rec) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) items.sort(key=lambda r: -(r.get("ts") or 0)) return { "ok": True, "n": min(len(items), max(1, min(limit, 2000))), "n_total": len(items), "items": items[:max(1, min(limit, 2000))], } # ────────────────────────────────────────────────────────────────────── # v2.99.153 — Backup + restore for HF Spaces ephemeral storage. # # HF Spaces free tier wipes /code on restarts (rebuild or factory). All # JSONL state files (users, lawyer_applications, requests, audit_log, # notifications, invites, feedback, triage_log) get lost. This module: # - GET /v1/admin/backup/download → ZIP of all state files # - POST /v1/admin/backup/restore → restore from uploaded ZIP # - Daemon thread: POST snapshot to LE_BACKUP_WEBHOOK_URL every 30 min # All admin-token gated. Restore requires double-confirm header. # ────────────────────────────────────────────────────────────────────── # Files included in backup (small JSONL + JSON state — NOT large uploads/deliverables) _BACKUP_FILE_PATHS: List[str] = [ "tau_rag/runtime/lawyer_requests.jsonl", "tau_rag/runtime/lawyer_applications.jsonl", "tau_rag/runtime/notifications.jsonl", "tau_rag/runtime/audit_log.jsonl", "tau_rag/runtime/beta_invites.jsonl", "tau_rag/runtime/feedback.jsonl", "tau_rag/runtime/triage_log.jsonl", "tau_rag/runtime/users.jsonl", "tau_rag/runtime/auth_sessions.jsonl", "tau_rag/runtime/admin_digest_state.json", ] def _collect_backup_snapshot() -> Dict[str, Any]: """Read all state files into a single dict. Each file → {path, content, size, mtime}. Skips files that don't exist. Returns metadata + files list.""" import time files = [] total_bytes = 0 for path_str in _BACKUP_FILE_PATHS: p = _wa_pl.Path(path_str) if not p.exists(): continue try: content = p.read_text(encoding="utf-8") stat = p.stat() files.append({ "path": path_str, "content": content, "size": stat.st_size, "mtime": stat.st_mtime, }) total_bytes += stat.st_size except Exception as e: print(f"[backup] read FAIL {path_str}: {e}") return { "version": "1", "snapshot_ts": time.time(), "snapshot_iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "n_files": len(files), "total_bytes": total_bytes, "files": files, } def _build_backup_zip(snapshot: Dict[str, Any]) -> bytes: """Pack snapshot into a ZIP with each file at its repo-relative path + manifest.json.""" import io, zipfile, json as _j buf = io.BytesIO() with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf: manifest = { "version": snapshot["version"], "snapshot_ts": snapshot["snapshot_ts"], "snapshot_iso": snapshot["snapshot_iso"], "n_files": snapshot["n_files"], "total_bytes": snapshot["total_bytes"], "files": [{"path": f["path"], "size": f["size"], "mtime": f["mtime"]} for f in snapshot["files"]], } zf.writestr("manifest.json", _j.dumps(manifest, ensure_ascii=False, indent=2)) for f in snapshot["files"]: zf.writestr(f["path"], f["content"]) return buf.getvalue() @app.get("/v1/admin/backup/download") def admin_backup_download(token: str = ""): # type: ignore """v2.99.153 — Download a ZIP of all state files. Admin-token-gated.""" import os from fastapi.responses import Response admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) try: snap = _collect_backup_snapshot() blob = _build_backup_zip(snap) import time fname = f"legal-eye-backup-{time.strftime('%Y%m%d-%H%M%S')}.zip" _audit_log(actor="admin", role="admin", action="backup_download", target="(snapshot)", meta={"n_files": snap["n_files"], "size": len(blob)}) return Response( content=blob, media_type="application/zip", headers={"Content-Disposition": f'attachment; filename="{fname}"'}, ) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"}) @app.get("/v1/admin/backup/status") def admin_backup_status(token: str = ""): # type: ignore """v2.99.153 — Inspect what would be backed up (no actual zip).""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) snap = _collect_backup_snapshot() webhook_set = bool(os.environ.get("LE_BACKUP_WEBHOOK_URL", "").strip()) return { "ok": True, "n_files": snap["n_files"], "total_bytes": snap["total_bytes"], "snapshot_iso": snap["snapshot_iso"], "webhook_set": webhook_set, "files": [{"path": f["path"], "size": f["size"]} for f in snap["files"]], } @app.post("/v1/admin/backup/restore") async def admin_backup_restore(request: Request, token: str = "", confirm: str = ""): # type: ignore """v2.99.153 — DANGEROUS: restore state from an uploaded backup ZIP. Requires admin token + confirm=YES_REPLACE. Overwrites all listed state files.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if confirm != "YES_REPLACE": return JSONResponse(status_code=400, content={ "ok": False, "reason": "confirm_required", "hint": "Pass ?confirm=YES_REPLACE to acknowledge file overwrite.", }) try: body = await request.body() if not body: return JSONResponse(status_code=400, content={"ok": False, "reason": "empty_body"}) import io, zipfile, json as _j with zipfile.ZipFile(io.BytesIO(body)) as zf: names = zf.namelist() if "manifest.json" not in names: return JSONResponse(status_code=400, content={"ok": False, "reason": "no_manifest"}) manifest = _j.loads(zf.read("manifest.json").decode("utf-8")) allowed = set(_BACKUP_FILE_PATHS) restored = [] skipped = [] for name in names: if name == "manifest.json": continue if name not in allowed: skipped.append(name); continue content = zf.read(name).decode("utf-8") p = _wa_pl.Path(name) p.parent.mkdir(parents=True, exist_ok=True) # Atomic write tmp = p.with_suffix(p.suffix + ".restore.tmp") tmp.write_text(content, encoding="utf-8") tmp.replace(p) restored.append({"path": name, "size": len(content)}) _audit_log(actor="admin", role="admin", action="backup_restore", target="(state)", meta={"n_restored": len(restored), "manifest_iso": manifest.get("snapshot_iso", "?")}) return { "ok": True, "n_restored": len(restored), "n_skipped": len(skipped), "manifest_iso": manifest.get("snapshot_iso", "?"), "restored": restored, "skipped": skipped, } except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"}) def _push_backup_to_webhook(snapshot: Dict[str, Any], zip_bytes: bytes) -> bool: """POST to LE_BACKUP_WEBHOOK_URL. Returns True on 2xx response.""" import os, urllib.request, base64 url = os.environ.get("LE_BACKUP_WEBHOOK_URL", "").strip() if not url: return False try: payload = { "kind": "legal-eye-backup", "version": snapshot["version"], "snapshot_iso": snapshot["snapshot_iso"], "n_files": snapshot["n_files"], "total_bytes": snapshot["total_bytes"], "zip_base64": base64.b64encode(zip_bytes).decode("ascii"), } req = urllib.request.Request( url, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) with urllib.request.urlopen(req, timeout=30) as r: return 200 <= r.status < 300 except Exception as e: print(f"[backup] webhook FAIL: {type(e).__name__}: {e}") return False _BACKUP_STATE_PATH = _wa_pl.Path("tau_rag/runtime/admin_backup_state.json") def _start_auto_backup_thread() -> None: """v2.99.153 — Daemon. Every 30 min, if LE_BACKUP_WEBHOOK_URL is set and any state file has changed since last backup, POST a snapshot.""" import threading, time, hashlib def _loop(): time.sleep(120) # warmup — let everything boot last_hash = "" while True: try: import os if not os.environ.get("LE_BACKUP_WEBHOOK_URL", "").strip(): time.sleep(30 * 60); continue snap = _collect_backup_snapshot() # Cheap change detection h = hashlib.sha256() for f in snap["files"]: h.update(f["path"].encode()); h.update(f["content"].encode()) cur_hash = h.hexdigest() if cur_hash == last_hash: time.sleep(30 * 60); continue blob = _build_backup_zip(snap) ok = _push_backup_to_webhook(snap, blob) if ok: last_hash = cur_hash try: _BACKUP_STATE_PATH.parent.mkdir(parents=True, exist_ok=True) _BACKUP_STATE_PATH.write_text(json.dumps({ "last_backup_ts": snap["snapshot_ts"], "last_backup_iso": snap["snapshot_iso"], "n_files": snap["n_files"], "total_bytes": snap["total_bytes"], "sha256": cur_hash[:16], }, ensure_ascii=False, indent=2), encoding="utf-8") except Exception: pass print(f"[backup] auto-snapshot OK · {snap['n_files']} files · {snap['total_bytes']}B") time.sleep(30 * 60) except Exception as e: print(f"[backup] loop FAIL: {type(e).__name__}: {e}") time.sleep(30 * 60) threading.Thread(target=_loop, daemon=True).start() # Start the daemon on import (alongside other startup threads) try: _start_auto_backup_thread() except Exception as _e: print(f"[backup] startup FAIL: {_e}") def _create_notification(email: str, kind: str, title: str, body: str = "", link: str = "", related_request_id: str = "") -> None: """Append a notification for `email`. Best-effort, never raises.""" import time, uuid if not email: return try: _NOTIFICATIONS_PATH.parent.mkdir(parents=True, exist_ok=True) rec = { "id": str(uuid.uuid4())[:12], "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "email": email.strip().lower(), "kind": kind, # e.g. "status_change", "lawyer_offered", "deliverable_submitted" "title": title, "body": (body or "")[:600], "link": link, "related_request_id": related_request_id, "read": False, } with _NOTIFICATIONS_LOCK: with open(_NOTIFICATIONS_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(rec, ensure_ascii=False) + "\n") except Exception as e: print(f"[notif] FAIL: {type(e).__name__}: {e}") @app.get("/v1/me/notifications") def me_notifications(request: Request, limit: int = 30, unread_only: bool = False): # type: ignore """List notifications for the signed-in user (works for both user and lawyer — discriminated only by their email).""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) if not _NOTIFICATIONS_PATH.exists(): return {"ok": True, "n": 0, "n_unread": 0, "items": []} items = [] email = sess["email"].strip().lower() with open(_NOTIFICATIONS_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("email") == email: if unread_only and rec.get("read"): continue items.append(rec) # newest first items.sort(key=lambda r: -(r.get("ts") or 0)) n_unread = sum(1 for r in items if not r.get("read")) return { "ok": True, "n": len(items), "n_unread": n_unread, "items": items[:max(1, min(limit, 200))], } except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) class _NotifMarkBody(BaseModel): # type: ignore ids: Optional[List[str]] = None # if None → mark all as read read: bool = True @app.post("/v1/me/notifications/mark") def me_notifications_mark(body: _NotifMarkBody, request: Request): # type: ignore """Mark one or more notifications read/unread. If body.ids is None or empty → mark ALL for this user.""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) if not _NOTIFICATIONS_PATH.exists(): return {"ok": True, "n_changed": 0} email = sess["email"].strip().lower() target_ids = set(body.ids or []) n_changed = 0 with _NOTIFICATIONS_LOCK: items = [] with open(_NOTIFICATIONS_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue for rec in items: if rec.get("email") != email: continue if target_ids and rec.get("id") not in target_ids: continue if rec.get("read") != body.read: rec["read"] = body.read n_changed += 1 tmp = _NOTIFICATIONS_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_NOTIFICATIONS_PATH) return {"ok": True, "n_changed": n_changed} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) @app.post("/v1/lawyer-request/submit-with-file") async def lawyer_request_submit_with_file( # type: ignore request: Request, file: UploadFile = File(...), question: str = Form(...), contact: str = Form(...), name: str = Form(""), contact_method: str = Form("either"), domain: str = Form(""), triage_category: str = Form(""), risk_level: str = Form(""), urgent: str = Form("false"), notes: str = Form(""), ): """v2.99.124 — Like /submit but with a single file attachment. Accepts PDF / DOCX / DOC / PNG / JPG / JPEG / TXT / RTF up to 10 MB. Saves under runtime/document_uploads/{request_id}/{safe_filename}. Creates the same lawyer-request record with extra fields: has_attachment: true attachment_filename, attachment_size, attachment_mime Then fires the same notification flow. """ import time, uuid try: q = (question or "").strip() c = (contact or "").strip() if len(q) < 4 or len(c) < 4: return JSONResponse(status_code=400, content={ "ok": False, "reason": "missing_required_fields", }) # Validate file orig_name = (file.filename or "upload").strip() # Get extension (lowercased) ext = "" if "." in orig_name: ext = "." + orig_name.rsplit(".", 1)[-1].lower() if ext not in _DOC_ALLOWED_EXT: return JSONResponse(status_code=400, content={ "ok": False, "reason": "unsupported_filetype", "allowed": sorted(_DOC_ALLOWED_EXT), }) # Read body & size-check content = await file.read() if len(content) > _DOC_MAX_BYTES: return JSONResponse(status_code=413, content={ "ok": False, "reason": "file_too_large", "max_bytes": _DOC_MAX_BYTES, }) if len(content) == 0: return JSONResponse(status_code=400, content={ "ok": False, "reason": "empty_file", }) # Build record + save file req_id = str(uuid.uuid4()) safe_name = _re.sub(r"[^\w\-.()]+", "_", orig_name)[:120] or ("upload" + ext) upload_dir = _DOC_UPLOAD_DIR / req_id upload_dir.mkdir(parents=True, exist_ok=True) file_path = upload_dir / safe_name with open(file_path, "wb") as f: f.write(content) # v2.99.130 — Optional user attribution signed_in_user = None try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if sess: signed_in_user = {"id": sess["user_id"], "email": sess["email"]} except Exception: pass record = { "id": req_id, "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "question": q, "name": (name or "").strip(), "contact": c, "contact_method": contact_method or "either", "domain": domain or "", "triage_category": triage_category or "", "risk_level": risk_level or "", "urgent": str(urgent).lower() in ("true", "1", "yes"), "notes": (notes or "").strip(), "has_attachment": True, "attachment_filename": safe_name, "attachment_size": len(content), "attachment_mime": file.content_type or "application/octet-stream", "status": "new", "user_id": signed_in_user["id"] if signed_in_user else None, "user_email": signed_in_user["email"] if signed_in_user else None, } _LAWYER_REQUEST_PATH.parent.mkdir(parents=True, exist_ok=True) with _LAWYER_REQUEST_LOCK: with open(_LAWYER_REQUEST_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") _notify_lead_async(record) # v2.99.144 — audit _audit_log( actor=signed_in_user["email"] if signed_in_user else "anonymous", role="user" if signed_in_user else "anonymous", action="request_submitted", target=record["id"], meta={"domain": record["domain"], "urgent": record["urgent"], "has_attachment": True, "filename": safe_name, "size": len(content)}, ) print(f"[lawyer-request] new #{req_id[:8]} (📎 {safe_name}, {len(content)}B) " f"domain={record['domain']} urgent={record['urgent']}") return { "ok": True, "id": req_id, "filename": safe_name, "size": len(content), "expected_sla": "24 hours" if not record["urgent"] else "4 hours", "message": "המסמך התקבל. עורך דין מאומת יבדוק ויחזור אליך תוך 24 שעות (4 שעות לדחוף).", } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.get("/v1/lawyer-request/{request_id}/attachment") def lawyer_request_get_attachment(request_id: str, token: str = ""): # type: ignore """Admin: download a request's attached file.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) # Find record if not _LAWYER_REQUEST_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) record = None try: with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: r = json.loads(line) if r.get("id") == request_id: record = r; break except Exception: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) if not record or not record.get("has_attachment"): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_attachment"}) fname = record.get("attachment_filename", "upload") path = _DOC_UPLOAD_DIR / request_id / fname if not path.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "file_missing_on_disk"}) from fastapi.responses import FileResponse as _FileResponse return _FileResponse( path, media_type=record.get("attachment_mime") or "application/octet-stream", filename=fname, ) # v2.99.148 — Dispute resolution RESOLUTION_LABEL_HE = { "user": "לטובת המשתמש", "lawyer": "לטובת עורך הדין", "partial": "פתרון משולב", "re_route": "ניתוב לעורך דין אחר", } def _notify_dispute_resolution_async(record: dict) -> None: """v2.99.148 — Email + bell both parties when admin resolves a dispute.""" import threading, os resolution = record.get("dispute_resolution", "") label_he = RESOLUTION_LABEL_HE.get(resolution, resolution) admin_note = record.get("dispute_resolution_note", "") rid_short = (record.get("id") or "?")[:8] # In-app bell for both if record.get("user_email"): _create_notification( record["user_email"], kind="dispute_resolved", title=f"⚖ המחלוקת נפתרה: {label_he}", body=(admin_note or "")[:200], link="/my-matters/", related_request_id=record.get("id", ""), ) if record.get("assigned_lawyer_email"): _create_notification( record["assigned_lawyer_email"], kind="dispute_resolved", title=f"⚖ המחלוקת נפתרה: {label_he}", body=(admin_note or "")[:200], link="/lawyer-dashboard/", related_request_id=record.get("id", ""), ) def _worker(): subject = f"legal-eye · המחלוקת נפתרה ({label_he})" title = f"⚖ המחלוקת נפתרה: {label_he}" # User email if record.get("user_email"): if resolution == "user": user_body = ( f"שלום,\n\nהמחלוקת שפתחת על הפנייה ב-legal-eye נפתרה - לטובתך.\n\n" f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\n" f"(במודל תשלומים עתידי — סכום מוחזר יוחזר אליך)\n\nמזהה: {rid_short}" ) elif resolution == "lawyer": user_body = ( f"שלום,\n\nהמחלוקת שפתחת על הפנייה ב-legal-eye נסקרה.\nלאחר בדיקה — הצוות החליט לטובת עורך הדין.\n\n" f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nאם יש לך שאלות — ניתן לפתוח פנייה חדשה.\n\nמזהה: {rid_short}" ) elif resolution == "partial": user_body = ( f"שלום,\n\nהמחלוקת שפתחת על הפנייה ב-legal-eye נפתרה בפתרון משולב.\n\n" f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nמזהה: {rid_short}" ) else: user_body = ( f"שלום,\n\nלאחר בדיקת המחלוקת — הפנייה שלך תשויך לעו\"ד אחר.\nלא נדרשת פעולה ממך. נחזור אליך בקרוב.\n\n" f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nמזהה: {rid_short}" ) _send_email_branded( record["user_email"], subject, user_body, title=title, cta_label="לאזור האישי ←", cta_url="https://legal-eye.1bigfam.com/my-matters/", ) # Lawyer email if record.get("assigned_lawyer_email"): lname = record.get("assigned_lawyer_name", "") if resolution == "lawyer": lawyer_body = ( f"שלום עו\"ד {lname},\n\nהמחלוקת שנפתחה על התוצר שהגשת ב-legal-eye נסקרה — לטובתך.\n\n" f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\n(במודל תשלומים עתידי — התשלום ישוחרר אליך)\n\nמזהה: {rid_short}" ) elif resolution == "user": lawyer_body = ( f"שלום עו\"ד {lname},\n\nהמחלוקת שנפתחה על התוצר שהגשת נסקרה.\nלאחר בדיקה — הצוות החליט לטובת המשתמש.\n\n" f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\n(במודל תשלומים עתידי — התשלום לא ישוחרר אליך)\n\nמזהה: {rid_short}" ) elif resolution == "partial": lawyer_body = ( f"שלום עו\"ד {lname},\n\nהמחלוקת נפתרה בפתרון משולב.\n\n" f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nמזהה: {rid_short}" ) else: lawyer_body = ( f"שלום עו\"ד {lname},\n\nלאחר בדיקת המחלוקת — הפנייה הזו תועבר לעורך דין אחר.\nאין צורך לפעול ביחס לפנייה זו.\n\n" f"החלטת הצוות:\n{admin_note or '(ראה אזור אישי)'}\n\nמזהה: {rid_short}" ) _send_email_branded( record["assigned_lawyer_email"], subject, lawyer_body, title=title, cta_label="לדשבורד ←", cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/", ) threading.Thread(target=_worker, daemon=True).start() class _DisputeResolutionBody(BaseModel): # type: ignore resolution: str # user | lawyer | partial | re_route note: str = "" @app.post("/v1/lawyer-request/{request_id}/resolve-dispute") def lawyer_request_resolve_dispute(request_id: str, body: _DisputeResolutionBody, token: str = ""): # type: ignore """Admin: resolve an open dispute. Sets status + resolution + notifies both.""" import os, time admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if body.resolution not in ("user", "lawyer", "partial", "re_route"): return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_resolution"}) if not _LAWYER_REQUEST_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) target = None with _LAWYER_REQUEST_LOCK: items = [] with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue for rec in items: if rec.get("id") == request_id: target = rec; break if not target: return JSONResponse(status_code=404, content={"ok": False, "reason": "request_not_found"}) if target.get("status") != "disputed": return JSONResponse(status_code=400, content={"ok": False, "reason": "not_disputed"}) target["dispute_resolution"] = body.resolution target["dispute_resolution_note"] = (body.note or "").strip() target["dispute_resolution_ts"] = time.time() target["dispute_resolution_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) target["dispute_resolved_by"] = "admin" # Status transition if body.resolution == "re_route": # Send back to admin queue, unassign current lawyer target["status"] = "new" target["assigned_lawyer_id"] = None target["assigned_lawyer_name"] = None target["assigned_lawyer_email"] = None target["lawyer_response"] = None else: # All other resolutions close the request target["status"] = "done" target["updated_ts"] = time.time() target["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_REQUEST_PATH) # Notify _notify_dispute_resolution_async(target) # Audit _audit_log( actor="admin", role="admin", action="dispute_resolved", target=request_id, meta={"resolution": body.resolution, "note": (body.note or "")[:200]}, ) return {"ok": True, "request_id": request_id, "resolution": body.resolution, "new_status": target["status"]} def _send_admin_message_sync(to_addr: str, subject: str, body: str) -> bool: """v2.99.151 — Send custom admin message via branded helper. Returns True on send.""" return _send_email_branded( to_addr, subject, body, title="הודעה מצוות legal-eye", cta_label="לאזור האישי ←", cta_url="https://legal-eye.1bigfam.com/my-matters/", ) class _AdminMessageBody(BaseModel): # type: ignore recipient: str # user | lawyer | both subject: str body: str @app.post("/v1/lawyer-request/{request_id}/send-message") def lawyer_request_send_message(request_id: str, body: _AdminMessageBody, token: str = ""): # type: ignore """Admin sends a custom email message to user / lawyer / both. Message is also logged on the record under admin_messages[].""" import os, time admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if body.recipient not in ("user", "lawyer", "both"): return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_recipient"}) subj = (body.subject or "").strip() msg_body = (body.body or "").strip() if not subj or len(msg_body) < 5: return JSONResponse(status_code=400, content={"ok": False, "reason": "missing_subject_or_body"}) if not _LAWYER_REQUEST_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) target = None with _LAWYER_REQUEST_LOCK: items = [] with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue for rec in items: if rec.get("id") == request_id: target = rec; break if not target: return JSONResponse(status_code=404, content={"ok": False, "reason": "request_not_found"}) # Determine recipients recipients = [] if body.recipient in ("user", "both") and target.get("user_email"): recipients.append(("user", target["user_email"])) if body.recipient in ("lawyer", "both") and target.get("assigned_lawyer_email"): recipients.append(("lawyer", target["assigned_lawyer_email"])) if not recipients: return JSONResponse(status_code=400, content={"ok": False, "reason": "no_valid_recipient_emails"}) # Send + collect outcomes outcomes = [] for role, addr in recipients: sent = _send_admin_message_sync( addr, f"[legal-eye] {subj}", f"{msg_body}\n\n---\nמזהה פנייה: {request_id[:8]}\n— legal-eye Admin", ) outcomes.append({"role": role, "email": addr, "sent": sent}) # Log onto record log_entry = { "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "recipient": body.recipient, "outcomes": outcomes, "subject": subj, "body": msg_body, } target.setdefault("admin_messages", []).append(log_entry) # Rewrite tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_REQUEST_PATH) # v2.99.144 — audit _audit_log( actor="admin", role="admin", action="admin_message_sent", target=request_id, meta={ "recipient": body.recipient, "subject": subj[:120], "n_sent": sum(1 for o in outcomes if o["sent"]), "n_failed": sum(1 for o in outcomes if not o["sent"]), }, ) return { "ok": True, "outcomes": outcomes, "n_sent": sum(1 for o in outcomes if o["sent"]), "n_failed": sum(1 for o in outcomes if not o["sent"]), } class _LawyerRequestUpdateBody(BaseModel): # type: ignore id: str status: str # new | contacted | done | spam note: Optional[str] = "" @app.post("/v1/lawyer-request/update") def lawyer_request_update(req: _LawyerRequestUpdateBody, token: str = ""): # type: ignore """v2.99.123 — Admin: update status of a waitlist record. Allowed statuses: new / contacted / done / spam. JSONL is rewritten in-place; lock prevents concurrent writes.""" import os, time admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) allowed = {"new", "contacted", "done", "spam"} if req.status not in allowed: return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_status"}) if not _LAWYER_REQUEST_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) with _LAWYER_REQUEST_LOCK: items = [] try: with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except Exception: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) # Find & update found = False old_status = "" updated_record = None for rec in items: if rec.get("id") == req.id: old_status = rec.get("status", "new") rec["status"] = req.status rec["updated_ts"] = time.time() rec["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) if req.note: rec["admin_note"] = req.note found = True updated_record = rec break if not found: return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"}) # Rewrite atomically try: tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_REQUEST_PATH) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) # v2.99.133 — Notify the user (async, non-blocking) if their status # changed to "contacted" or "done" and they have an email on record if updated_record: _notify_user_status_change_async(updated_record, old_status, req.status) # v2.99.144 — audit _audit_log( actor="admin", role="admin", action="request_status_update", target=req.id, meta={"old": old_status, "new": req.status, "note": req.note or ""}, ) return {"ok": True, "id": req.id, "status": req.status, "notified": bool(updated_record and updated_record.get("user_email"))} @app.get("/v1/lawyer-request/list") def lawyer_request_list(token: str = ""): # type: ignore """Admin: list all waitlist requests. Protected by a simple env-token.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if not _LAWYER_REQUEST_PATH.exists(): return {"ok": True, "n": 0, "items": []} items = [] try: with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except Exception: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) return {"ok": True, "n": len(items), "items": items} # ────────────────────────────────────────────────────────────────────── # v2.99.125 — Lawyer onboarding v0. Lawyers register themselves so we # can seed the pool. No Auth yet — admin manually verifies via license # number + Bar Association lookup. # ────────────────────────────────────────────────────────────────────── _LAWYER_APP_PATH = _wa_pl.Path("tau_rag/runtime/lawyer_applications.jsonl") _LAWYER_APP_LOCK = _wa_th.Lock() def _notify_lawyer_application_async(record: dict) -> None: """Same webhook + email channels as lead notify, but framed for a lawyer joining the pool (different prefix in summaries).""" import threading, os def _worker(): # Webhook try: url = os.environ.get("LE_LEAD_WEBHOOK_URL", "").strip() if url: import urllib.request summary = ( f"⚖ עו\"ד חדש מבקש להצטרף ל-legal-eye\n" f"שם: {record.get('full_name', '?')}\n" f"רישיון: {record.get('license_number', '?')}\n" f"אימייל: {record.get('email', '?')}\n" f"טלפון: {record.get('phone', '?')}\n" f"תחומים: {', '.join(record.get('domains', []))}\n" f"ותק: {record.get('years_experience', '?')} שנים\n" f"עיר: {record.get('city', '?')}\n" f"מזהה: {record.get('id', '?')[:8]}" ) payload = {"text": summary, "summary": summary, "record": record, "_type": "lawyer_application"} req = urllib.request.Request( url, data=json.dumps(payload).encode("utf-8"), headers={"Content-Type": "application/json"}, method="POST", ) try: with urllib.request.urlopen(req, timeout=10) as r: print(f"[lawyer-app-notify] webhook OK ({r.status})") except Exception as e: print(f"[lawyer-app-notify] webhook FAIL: {type(e).__name__}: {e}") except Exception as e: print(f"[lawyer-app-notify] outer FAIL: {e}") # Email try: to_addr = os.environ.get("LE_LEAD_EMAIL_TO", "").strip() if to_addr: body = ( f"עו\"ד חדש מבקש להצטרף ל-legal-eye.\n\n" f"מזהה: {record.get('id')}\n" f"זמן: {record.get('iso')}\n\n" f"שם מלא: {record.get('full_name', '?')}\n" f"מס׳ רישיון: {record.get('license_number', '?')}\n" f"אימייל: {record.get('email', '?')}\n" f"טלפון: {record.get('phone', '?')}\n" f"עיר: {record.get('city', '?')} (עבודה מרחוק: {'כן' if record.get('remote_ok') else 'לא'})\n\n" f"תחומי התמחות: {', '.join(record.get('domains', []))}\n" f"ותק: {record.get('years_experience', '?')} שנים\n" f"שפות: {', '.join(record.get('languages', []))}\n\n" f"ביוגרפיה:\n{record.get('bio', '(לא צוין)')}\n\n" f"למה לרצות להצטרף:\n{record.get('motivation', '(לא צוין)')}" ) subject = f"[legal-eye] עו״ד חדש · {record.get('full_name', '?')} · {record.get('id', '')[:8]}" ok = _send_email_branded( to_addr, subject, body, title="⚖ עו\"ד חדש מבקש להצטרף", cta_label="לאימות עורכי הדין ←", cta_url="https://legal-eye.1bigfam.com/admin-leads/lawyer-applications.html", ) if ok: print(f"[lawyer-app-notify] email OK → {to_addr}") except Exception as e: print(f"[lawyer-app-notify] email outer FAIL: {e}") threading.Thread(target=_worker, daemon=True).start() class _LawyerApplicationBody(BaseModel): # type: ignore full_name: str license_number: str email: str phone: str domains: List[str] # multi-select years_experience: Optional[int] = 0 languages: Optional[List[str]] = None # default: ['hebrew'] city: Optional[str] = "" remote_ok: Optional[bool] = True bio: Optional[str] = "" motivation: Optional[str] = "" accept_terms: bool # must be true invite_code: Optional[str] = "" # v2.99.150 — required if LE_BETA_GATED=1 @app.post("/v1/lawyer-onboarding/submit") def lawyer_onboarding_submit(req: _LawyerApplicationBody): # type: ignore """v2.99.125 — Lawyer joins the pool. No Auth — admin verifies manually against Bar Association registry. v2.99.150 — If LE_BETA_GATED=1, requires a valid invite code.""" import os, time, uuid try: # Beta gating if os.environ.get("LE_BETA_GATED", "").strip() in ("1", "true", "yes"): inv = _validate_invite_code(req.invite_code or "") if not inv.get("ok"): return JSONResponse(status_code=400, content={ "ok": False, "reason": f"invite_{inv.get('reason','invalid')}", }) if not req.accept_terms: return JSONResponse(status_code=400, content={ "ok": False, "reason": "terms_not_accepted", }) # Required fields for field, val in [ ("full_name", req.full_name), ("license_number", req.license_number), ("email", req.email), ("phone", req.phone), ]: if not (val or "").strip(): return JSONResponse(status_code=400, content={ "ok": False, "reason": f"missing_{field}", }) if not req.domains: return JSONResponse(status_code=400, content={ "ok": False, "reason": "missing_domains", }) # Validate email shape em = req.email.strip() if "@" not in em or "." not in em.split("@")[-1]: return JSONResponse(status_code=400, content={ "ok": False, "reason": "invalid_email", }) # Validate license: digits only, 4-7 chars (Israeli Bar uses 4-6) lic = _re.sub(r"\D+", "", req.license_number) if len(lic) < 3 or len(lic) > 8: return JSONResponse(status_code=400, content={ "ok": False, "reason": "invalid_license_number", }) record = { "id": str(uuid.uuid4()), "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "full_name": req.full_name.strip(), "license_number": lic, "email": em, "phone": req.phone.strip(), "domains": [d.strip() for d in req.domains if d.strip()], "years_experience": int(req.years_experience or 0), "languages": (req.languages or ["hebrew"]), "city": (req.city or "").strip(), "remote_ok": bool(req.remote_ok), "bio": (req.bio or "").strip(), "motivation": (req.motivation or "").strip(), "status": "submitted", } _LAWYER_APP_PATH.parent.mkdir(parents=True, exist_ok=True) # Check duplicate (same license + email = duplicate) existing = [] if _LAWYER_APP_PATH.exists(): try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: try: existing.append(json.loads(line)) except: continue except: pass for prev in existing: if prev.get("license_number") == lic and prev.get("email") == em: return JSONResponse(status_code=409, content={ "ok": False, "reason": "already_applied", "id": prev.get("id"), "status": prev.get("status"), }) # Append with _LAWYER_APP_LOCK: with open(_LAWYER_APP_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(record, ensure_ascii=False) + "\n") # v2.99.150 — consume invite if beta-gated if os.environ.get("LE_BETA_GATED", "").strip() in ("1", "true", "yes"): if req.invite_code: _consume_invite_code(req.invite_code, em) record["invite_code"] = req.invite_code.strip().upper() _notify_lawyer_application_async(record) # v2.99.144 — audit _audit_log( actor=em, role="lawyer", action="lawyer_application_submitted", target=record["id"], meta={"license": lic, "domains": record["domains"], "years": record["years_experience"]}, ) print(f"[lawyer-app] new #{record['id'][:8]} {record['full_name']} " f"lic={lic} domains={record['domains']}") return { "ok": True, "id": record["id"], "message": "בקשתך התקבלה. נוודא את פרטי הרישיון מול לשכת עורכי הדין ונחזור אליך תוך 5 ימי עסקים.", } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) # ────────────────────────────────────────────────────────────────────── # v2.99.155 — Public lawyer profile. # Returns the lawyer's bio / specialties / credentials WITHOUT contact # details (email/phone always masked). For verified lawyers only. # Slug format: - for nice URLs e.g. # /lawyer-profile/?slug=dani-cohen-abc12345 # Accepts either ?id= or ?slug=-. # ────────────────────────────────────────────────────────────────────── def _slugify_he(name: str) -> str: """Make a URL-safe slug from a Hebrew name. Keeps Hebrew letters, replaces spaces with hyphens, drops everything else.""" import re as _re n = (name or "").strip() # Replace any whitespace/punct with hyphen, keep Hebrew + Latin + digits n = _re.sub(r'[\s_]+', '-', n) n = _re.sub(r'[^֐-׿a-zA-Z0-9\-]', '', n) n = _re.sub(r'-+', '-', n).strip('-') return n or "lawyer" def _lawyer_public_slug(rec: dict) -> str: """Return canonical slug for a lawyer record: name-slug + id-prefix.""" return f"{_slugify_he(rec.get('full_name', ''))}-{(rec.get('id', '') or '')[:8]}" def _lawyer_review_stats(lawyer_id: str) -> dict: """v2.99.189 — Compute review aggregation from lawyer_requests.jsonl. Returns {avg, count} for this lawyer's approved requests with ratings.""" if not lawyer_id or not _LAWYER_REQUEST_PATH.exists(): return {"avg": None, "count": 0} total = 0 n = 0 try: with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("assigned_lawyer_id") != lawyer_id: continue rating = rec.get("review_rating") if rating and isinstance(rating, (int, float)) and 1 <= rating <= 5: total += rating n += 1 except Exception: pass if n == 0: return {"avg": None, "count": 0} return {"avg": round(total / n, 1), "count": n} def _lawyer_public_view(rec: dict) -> dict: """Return only public-safe fields from a lawyer record.""" review_stats = _lawyer_review_stats(rec.get("id", "")) # v2.99.189 # v2.99.204 — opt-in WhatsApp link. Normalize to E.164-ish (digits only, # Israeli prefixes 0X → 972X). Only exposed if lawyer toggled opt-in. wa_phone = "" if rec.get("whatsapp_public_optin"): raw = (rec.get("phone") or "").strip() digits = "".join(c for c in raw if c.isdigit()) if digits.startswith("972"): wa_phone = digits elif digits.startswith("0") and len(digits) >= 9: wa_phone = "972" + digits[1:] elif len(digits) >= 9: wa_phone = digits return { "id": rec.get("id", ""), "slug": _lawyer_public_slug(rec), "full_name": rec.get("full_name", ""), "license_number": rec.get("license_number", ""), "domains": rec.get("domains", []) or [], "years_experience": rec.get("years_experience", 0), "languages": rec.get("languages", []) or [], "city": rec.get("city", ""), "remote_ok": bool(rec.get("remote_ok", False)), "bio": rec.get("bio", ""), "verified": (rec.get("status") == "verified"), "joined_iso": rec.get("iso", ""), "photo_url": rec.get("photo_url", ""), # v2.99.178 "review_avg": review_stats["avg"], # v2.99.189 "review_count": review_stats["count"], "whatsapp_phone": wa_phone, # v2.99.204 } @app.get("/sitemap.xml") def sitemap_xml(): # type: ignore """v2.99.157 — Dynamic sitemap. Static public URLs + one URL per verified lawyer profile. Served via Vercel rewrite from legal-eye.1bigfam.com/sitemap.xml.""" from fastapi.responses import Response import time base = "https://legal-eye.1bigfam.com" today = time.strftime("%Y-%m-%d") # Static URLs — homepage + marketing + content static_urls = [ (f"{base}/", today, "weekly", "1.0"), (f"{base}/landing", today, "weekly", "0.9"), (f"{base}/eval", today, "weekly", "0.9"), (f"{base}/compare", "2026-05-12", "monthly", "0.8"), (f"{base}/founder", "2026-05-12", "monthly", "0.7"), (f"{base}/doctrines", "2026-05-13", "monthly", "0.8"), (f"{base}/blog", "2026-05-13", "weekly", "0.8"), (f"{base}/blog/legal-ai-pitfalls", "2026-05-13", "monthly", "0.7"), (f"{base}/blog/how-to-read-legal-ai-eval", "2026-05-13", "monthly", "0.7"), (f"{base}/blog/rag-101", "2026-05-13", "monthly", "0.7"), (f"{base}/blog/850-docs-no-change","2026-05-13", "monthly", "0.7"), (f"{base}/blog/rental-contract-mistakes","2026-05-25","monthly","0.8"), (f"{base}/blog/severance-rights-after-dismissal","2026-05-26","monthly","0.8"), (f"{base}/blog/consumer-rights-israel","2026-05-27","monthly","0.8"), (f"{base}/blog/statute-of-limitations-israel","2026-05-28","monthly","0.85"), (f"{base}/blog/what-to-do-when-sued","2026-05-29","monthly","0.85"), (f"{base}/docs/api", "2026-05-13", "monthly", "0.6"), (f"{base}/for-lawyers/", today, "weekly", "0.9"), (f"{base}/lawyers/", today, "daily", "0.9"), (f"{base}/faq/", today, "weekly", "0.8"), (f"{base}/about/", today, "monthly", "0.7"), (f"{base}/contact/", today, "monthly", "0.7"), (f"{base}/pricing/", today, "weekly", "0.8"), (f"{base}/press/", today, "monthly", "0.6"), (f"{base}/status/", today, "daily", "0.5"), (f"{base}/changelog/", today, "weekly", "0.6"), (f"{base}/glossary/", today, "monthly", "0.8"), (f"{base}/calculator/", today, "monthly", "0.85"), (f"{base}/how-it-works/", today, "monthly", "0.9"), (f"{base}/reviews/", today, "weekly", "0.75"), (f"{base}/templates/", today, "monthly", "0.9"), (f"{base}/quiz/", today, "monthly", "0.85"), (f"{base}/precedents/", today, "monthly", "0.85"), (f"{base}/lawyer-cost-guide/", today, "monthly", "0.9"), (f"{base}/timeline/", today, "monthly", "0.8"), (f"{base}/start/", today, "monthly", "0.95"), (f"{base}/israeli-laws/", today, "monthly", "0.85"), (f"{base}/our-corpus/", today, "monthly", "0.9"), (f"{base}/roadmap/", today, "monthly", "0.8"), (f"{base}/security/", today, "monthly", "0.75"), (f"{base}/en/", today, "monthly", "0.7"), (f"{base}/methodology/", today, "monthly", "0.85"), (f"{base}/contribute/", today, "monthly", "0.75"), (f"{base}/now/", today, "weekly", "0.8"), (f"{base}/quick-reference/", today, "monthly", "0.85"), (f"{base}/api/", today, "monthly", "0.85"), (f"{base}/api/playground/", today, "monthly", "0.8"), (f"{base}/verify-citation/", today, "monthly", "0.85"), (f"{base}/try/", today, "weekly", "0.9"), (f"{base}/case-brief/", today, "monthly", "0.8"), (f"{base}/deadline-calc/", today, "monthly", "0.9"), (f"{base}/tools/", today, "weekly", "0.95"), (f"{base}/redline/", today, "monthly", "0.85"), (f"{base}/sitemap/", today, "weekly", "0.8"), (f"{base}/poster/", today, "monthly", "0.7"), (f"{base}/learn/", today, "weekly", "0.9"), (f"{base}/learn/day-1/", today, "monthly", "0.75"), (f"{base}/learn/day-2/", today, "monthly", "0.75"), (f"{base}/learn/day-3/", today, "monthly", "0.75"), (f"{base}/learn/day-4/", today, "monthly", "0.75"), (f"{base}/learn/day-5/", today, "monthly", "0.75"), (f"{base}/learn/day-6/", today, "monthly", "0.75"), (f"{base}/learn/day-7/", today, "monthly", "0.75"), (f"{base}/poster-citations/", today, "monthly", "0.7"), (f"{base}/posters/", today, "weekly", "0.85"), (f"{base}/share/", today, "monthly", "0.7"), (f"{base}/poster-doctrines/", today, "monthly", "0.7"), (f"{base}/qr/", today, "monthly", "0.75"), (f"{base}/cases/", today, "weekly", "0.85"), (f"{base}/cases/raviv-bayit-yules/", today, "monthly", "0.8"), (f"{base}/cases/roker-salomon/", today, "monthly", "0.8"), (f"{base}/cases/paz-gas-gazit-hadarom/", today, "monthly", "0.8"), (f"{base}/cases/bank-mizrahi-migdal/", today, "monthly", "0.85"), (f"{base}/cases/alice-miller-defense/", today, "monthly", "0.85"), (f"{base}/cases/yedioth-kraus/", today, "monthly", "0.85"), (f"{base}/cases/aes-systems-saar/", today, "monthly", "0.85"), (f"{base}/cases/vaknin-bet-shemesh/", today, "monthly", "0.85"), (f"{base}/blog/non-compete-hi-tech-israel/", today, "monthly", "0.8"), (f"{base}/blog/negligence-claim-israel/", today, "monthly", "0.8"), (f"{base}/perf/", today, "monthly", "0.7"), (f"{base}/limitations-check/", today, "monthly", "0.85"), (f"{base}/components/", today, "monthly", "0.7"), (f"{base}/og-test/", today, "monthly", "0.6"), (f"{base}/anonymize/", today, "monthly", "0.85"), (f"{base}/numbers/", today, "weekly", "0.85"), (f"{base}/keyboard/", today, "monthly", "0.65"), (f"{base}/badge/", today, "monthly", "0.7"), (f"{base}/lawyer-faq/", today, "monthly", "0.85"), (f"{base}/ai-glossary/", today, "monthly", "0.8"), (f"{base}/lawyers/onboard.html", "2026-05-21", "monthly", "0.6"), # v2.99.200 — Domain landing pages (SEO funnel per-area) (f"{base}/lawyers/employment/", today, "weekly", "0.85"), (f"{base}/lawyers/contracts/", today, "weekly", "0.85"), (f"{base}/lawyers/consumer/", today, "weekly", "0.85"), (f"{base}/lawyers/tenancy/", today, "weekly", "0.85"), (f"{base}/legal/tos.html", "2026-05-12", "yearly", "0.3"), (f"{base}/legal/privacy.html", "2026-05-12", "yearly", "0.3"), ] # Dynamic — one URL per verified lawyer lawyer_urls = [] if _LAWYER_APP_PATH.exists(): try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("status") != "verified": continue slug = _lawyer_public_slug(rec) iso = (rec.get("iso", "") or "")[:10] or today from urllib.parse import quote lawyer_urls.append( (f"{base}/lawyer-profile/?slug={quote(slug)}", iso, "monthly", "0.7") ) except Exception as e: print(f"[sitemap] lawyer enum FAIL: {e}") lines = ['', ''] for loc, lastmod, freq, prio in static_urls + lawyer_urls: # XML-safe loc (& must be &) loc_safe = loc.replace("&", "&") lines.append( f" {loc_safe}" f"{lastmod}" f"{freq}" f"{prio}" ) lines.append('') xml = "\n".join(lines) return Response( content=xml, media_type="application/xml; charset=utf-8", headers={"Cache-Control": "public, max-age=3600, s-maxage=3600"}, ) @app.get("/robots.txt") def robots_txt(): # type: ignore """v2.99.157 — Dynamic robots.txt. Allows public pages, blocks admin/private, points to the dynamic sitemap.xml.""" from fastapi.responses import Response txt = """# legal-eye — Legal Intelligence Platform # legal-eye.1bigfam.com · Hebrew legal RAG, verbatim-from-precedent User-agent: * Allow: / Allow: /landing Allow: /eval Allow: /compare Allow: /doctrines Allow: /founder Allow: /blog/ Allow: /docs/api Allow: /legal/ Allow: /for-lawyers/ Allow: /lawyers/ Allow: /lawyer-profile/ Allow: /faq/ Allow: /about/ Allow: /contact/ Allow: /pricing/ Allow: /press/ Allow: /brand/ Allow: /status/ Allow: /changelog/ # Internal / admin / private paths Disallow: /admin Disallow: /admin-founding50/ Disallow: /admin-leads/ Disallow: /admin-analytics/ Disallow: /docs/setup/ Disallow: /my-matters/ Disallow: /lawyer-dashboard/ Disallow: /v1/ Disallow: /static/ # Be a good citizen Crawl-delay: 1 Sitemap: https://legal-eye.1bigfam.com/sitemap.xml """ return Response( content=txt, media_type="text/plain; charset=utf-8", headers={"Cache-Control": "public, max-age=3600"}, ) @app.get("/v1/lawyer/profile") def lawyer_profile(id: str = "", slug: str = ""): # type: ignore """v2.99.155 — Public lawyer profile. Returns only public-safe fields. Lookup by either full UUID (`?id=`) or slug (`?slug=name-id_prefix`). Only returns verified lawyers.""" if not (id or slug): return JSONResponse(status_code=400, content={"ok": False, "reason": "id_or_slug_required"}) if not _LAWYER_APP_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found"}) target = None try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue # Match by id or slug if id and rec.get("id") == id: target = rec; break if slug and _lawyer_public_slug(rec) == slug: target = rec; break # Also accept just id-prefix tail of slug as a fallback if slug and "-" in slug: tail = slug.split("-")[-1] if len(tail) == 8 and (rec.get("id", "") or "").startswith(tail): target = rec; break except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"}) if not target: return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found"}) if target.get("status") != "verified": return JSONResponse(status_code=404, content={"ok": False, "reason": "not_verified", "hint": "Profile is only public after admin verification."}) return {"ok": True, "lawyer": _lawyer_public_view(target)} @app.get("/v1/lawyer/directory") def lawyer_directory(domain: str = "", city: str = "", limit: int = 50): # type: ignore """v2.99.155 — Public directory of verified lawyers. Optional filters.""" if not _LAWYER_APP_PATH.exists(): return {"ok": True, "n": 0, "items": []} items = [] try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("status") != "verified": continue if domain and domain not in (rec.get("domains", []) or []): continue if city and (rec.get("city", "") or "").strip() != city.strip(): continue items.append(_lawyer_public_view(rec)) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) # Sort: most experienced first items.sort(key=lambda x: -(x.get("years_experience") or 0)) items = items[:max(1, min(limit, 200))] return {"ok": True, "n": len(items), "items": items} @app.get("/v1/public/reviews") def public_reviews(limit: int = 12): # type: ignore """v2.99.212 — Public reviews feed. Returns recent client reviews with rating + (truncated) text. Anonymizes the reviewer (first letter only). Resolves lawyer slug + name for linking. Cached 5min.""" import os, json as _json, datetime as _dt from fastapi.responses import JSONResponse as _JR limit = max(1, min(int(limit), 50)) out_items = [] avg = None total_count = 0 # Build lawyer_id → public_view map (needs to expose name + slug) lawyer_map = {} if _LAWYER_APP_PATH.exists(): try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _json.loads(line) except: continue if rec.get("status") != "verified": continue lid = rec.get("id", "") if lid: lawyer_map[lid] = { "name": rec.get("full_name", ""), "slug": _lawyer_public_slug(rec), "domains": rec.get("domains", []) or [], } except Exception: pass # Walk request log to find approved reviews if _LAWYER_REQUEST_PATH.exists(): rated = [] try: with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _json.loads(line) except: continue rating = rec.get("review_rating") if not (isinstance(rating, (int, float)) and 1 <= rating <= 5): continue total_count += 1 rated.append(rec) except Exception: pass if rated: avg = round(sum(r["review_rating"] for r in rated) / len(rated), 2) # Sort by review_iso desc (fall back to iso) rated.sort(key=lambda r: r.get("review_iso") or r.get("iso") or "", reverse=True) for rec in rated[:limit]: lid = rec.get("assigned_lawyer_id", "") L = lawyer_map.get(lid, {}) requester_name = (rec.get("name") or "").strip() # Anonymize: first name only, then "*****" first = requester_name.split()[0] if requester_name else "לקוח" anon = first + " " + ("*" * 4) if requester_name else "לקוח אנונימי" text = (rec.get("review_text") or "").strip() out_items.append({ "rating": rec.get("review_rating"), "text": text[:280], "iso": (rec.get("review_iso") or rec.get("iso") or "")[:10], "reviewer": anon, "lawyer_name": L.get("name", ""), "lawyer_slug": L.get("slug", ""), "lawyer_domain": (L.get("domains") or [None])[0], }) return _JR( content={ "ok": True, "n": len(out_items), "total_count": total_count, "review_avg": avg, "items": out_items, }, headers={ "Cache-Control": "public, max-age=300, s-maxage=300", "Access-Control-Allow-Origin": "*", }, ) @app.get("/v1/public/stats") def public_platform_stats(): # type: ignore """v2.99.202 — Public stats widget. Returns aggregate counts safe for public display: verified lawyer count, total Q&A logged, total reviews, average rating, days running, recent activity. No PII, no admin scope. Cached at the edge for 5 minutes.""" import os, json as _json, datetime as _dt from fastapi.responses import JSONResponse as _JR # Static project launch date (Day 0 = 2026-04-07; legal-eye public Day 48) LAUNCH_DATE = _dt.date(2026, 4, 7) days_running = max(1, (_dt.date.today() - LAUNCH_DATE).days) out = { "ok": True, "days_running": days_running, "lawyers_verified": 0, "lawyers_pending": 0, "lawyer_domains": 0, "questions_answered": 0, "reviews_count": 0, "review_avg": None, "newsletter_subscribers": 0, "last_updated": _dt.datetime.utcnow().isoformat(timespec="seconds") + "Z", } # Verified lawyers + domains coverage try: if _LAWYER_APP_PATH.exists(): domains_seen = set() with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _json.loads(line) except: continue status = rec.get("status", "") if status == "verified": out["lawyers_verified"] += 1 for d in (rec.get("domains") or []): domains_seen.add(d) elif status in ("pending", "submitted"): out["lawyers_pending"] += 1 out["lawyer_domains"] = len(domains_seen) except Exception: pass # Questions answered (lawyer_qa.db row count) try: conn = _lawyer_qa_db() cur = conn.execute("SELECT COUNT(*) FROM lawyer_qa") out["questions_answered"] = int(cur.fetchone()[0] or 0) except Exception: pass # Reviews aggregate (from lawyer_requests.jsonl review_rating field) try: if _LAWYER_REQUEST_PATH.exists(): ratings = [] with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _json.loads(line) except: continue rating = rec.get("review_rating") if isinstance(rating, (int, float)) and 1 <= rating <= 5: ratings.append(float(rating)) if ratings: out["reviews_count"] = len(ratings) out["review_avg"] = round(sum(ratings) / len(ratings), 2) except Exception: pass # Newsletter subscribers (active only) try: if _NEWSLETTER_PATH.exists(): with open(_NEWSLETTER_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = _json.loads(line) except: continue if rec.get("status") == "active": out["newsletter_subscribers"] += 1 except Exception: pass return _JR( content=out, headers={ "Cache-Control": "public, max-age=300, s-maxage=300", "Access-Control-Allow-Origin": "*", }, ) @app.get("/v1/lawyer-onboarding/list") def lawyer_onboarding_list(token: str = "", with_stats: bool = True): # type: ignore """Admin: list all lawyer applications. v2.99.147 — by default also inlines stats per lawyer (KPIs).""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if not _LAWYER_APP_PATH.exists(): return {"ok": True, "n": 0, "items": []} items = [] try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) if with_stats: try: from ..matching import stats_for_all_lawyers all_stats = stats_for_all_lawyers() for rec in items: rec["stats"] = all_stats.get(rec.get("id", ""), {}) except Exception as e: print(f"[lawyer-list] stats compute FAIL: {e}") return {"ok": True, "n": len(items), "items": items} class _LawyerRequestNoteBody(BaseModel): # type: ignore id: str note: str @app.post("/v1/admin/lawyer-request/note") def lawyer_request_note(req: _LawyerRequestNoteBody, token: str = ""): # type: ignore """v2.99.196 — Admin: append a timestamped CRM note to a lawyer request record. Persistent, append-only. Same pattern as /v1/admin/lawyer-application/note but for requests.""" import os, time, uuid admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) note_text = (req.note or "").strip() if not note_text or len(note_text) < 2: return JSONResponse(status_code=400, content={"ok": False, "reason": "note_empty"}) if not _LAWYER_REQUEST_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) note_entry = { "id": str(uuid.uuid4())[:8], "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "text": note_text[:1000], } with _LAWYER_REQUEST_LOCK: items = [] try: with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) found = False for rec in items: if rec.get("id") == req.id: if "notes_history" not in rec or not isinstance(rec.get("notes_history"), list): rec["notes_history"] = [] rec["notes_history"].append(note_entry) found = True break if not found: return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"}) tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_REQUEST_PATH) _audit_log(actor="admin", role="admin", action="request_note_added", target=req.id, meta={"note_id": note_entry["id"], "len": len(note_text)}) return {"ok": True, "note": note_entry} class _LawyerAppNoteBody(BaseModel): # type: ignore id: str note: str @app.post("/v1/admin/lawyer-application/note") def lawyer_application_note(req: _LawyerAppNoteBody, token: str = ""): # type: ignore """v2.99.194 — Admin: append a timestamped note to a lawyer's record. Persistent across status changes (unlike admin_note which is overwritten). Useful for CRM-style outreach tracking.""" import os, time, uuid admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) note_text = (req.note or "").strip() if not note_text or len(note_text) < 2: return JSONResponse(status_code=400, content={"ok": False, "reason": "note_empty"}) if not _LAWYER_APP_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) note_entry = { "id": str(uuid.uuid4())[:8], "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "text": note_text[:1000], } with _LAWYER_APP_LOCK: items = [] try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) found = False for rec in items: if rec.get("id") == req.id: if "notes_history" not in rec or not isinstance(rec.get("notes_history"), list): rec["notes_history"] = [] rec["notes_history"].append(note_entry) found = True break if not found: return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"}) tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_APP_PATH) _audit_log(actor="admin", role="admin", action="lawyer_note_added", target=req.id, meta={"note_id": note_entry["id"], "len": len(note_text)}) return {"ok": True, "note": note_entry} class _LawyerAppUpdateBody(BaseModel): # type: ignore id: str status: str # submitted | under_review | verified | rejected admin_note: Optional[str] = "" # v2.99.178 — Lawyer profile photos _LAWYER_PHOTO_DIR = _wa_pl.Path("tau_rag/runtime/lawyer_photos") _ALLOWED_PHOTO_EXTS = {"jpg", "jpeg", "png", "webp"} _MAX_PHOTO_BYTES = 4 * 1024 * 1024 # 4MB @app.post("/v1/lawyer-onboarding/photo") async def lawyer_photo_upload(request: Request, lawyer_id: str = ""): # type: ignore """v2.99.178 — Upload/replace a lawyer's profile photo. Auth: signed-in lawyer (via session) updating their own record, OR admin via ?token=. PNG/JPEG/WEBP only, max 4MB. Stored on disk at tau_rag/runtime/lawyer_photos/{lawyer_id}.{ext}. Updates the lawyer record's photo_url field.""" import os, time # Auth: either admin token or self-update via signed-in session is_admin = False sess = None try: admin_token = os.environ.get("LE_ADMIN_TOKEN", "") q_token = request.query_params.get("token", "") if admin_token and q_token == admin_token: is_admin = True except Exception: pass if not is_admin: try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) except Exception: pass if not (is_admin or sess): return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) if not lawyer_id: return JSONResponse(status_code=400, content={"ok": False, "reason": "missing_lawyer_id"}) # Read multipart file try: form = await request.form() file = form.get("photo") if not file or not hasattr(file, "filename"): return JSONResponse(status_code=400, content={"ok": False, "reason": "missing_photo"}) content = await file.read() if len(content) > _MAX_PHOTO_BYTES: return JSONResponse(status_code=400, content={ "ok": False, "reason": "file_too_large", "max_bytes": _MAX_PHOTO_BYTES, }) # Determine extension fname = (file.filename or "").lower() ext = fname.rsplit(".", 1)[-1] if "." in fname else "" if ext not in _ALLOWED_PHOTO_EXTS: return JSONResponse(status_code=400, content={ "ok": False, "reason": "invalid_extension", "allowed": list(_ALLOWED_PHOTO_EXTS), }) except Exception as e: return JSONResponse(status_code=400, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) # Locate the lawyer record if not _LAWYER_APP_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "lawyer_not_found"}) items = [] try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: items.append(json.loads(line)) except: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) target = None for r in items: if r.get("id") == lawyer_id: target = r; break if not target: return JSONResponse(status_code=404, content={"ok": False, "reason": "lawyer_not_found"}) # If self-update path, verify email matches signed-in session if not is_admin and sess: if (target.get("email", "") or "").lower() != (sess.get("email", "") or "").lower(): return JSONResponse(status_code=403, content={"ok": False, "reason": "not_your_record"}) # Save photo _LAWYER_PHOTO_DIR.mkdir(parents=True, exist_ok=True) # Delete any old photo (different ext) so we don't accumulate for old in _LAWYER_PHOTO_DIR.glob(f"{lawyer_id}.*"): try: old.unlink() except Exception: pass target_path = _LAWYER_PHOTO_DIR / f"{lawyer_id}.{ext}" try: target_path.write_bytes(content) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": f"write: {e}"}) # Update lawyer record with photo_url photo_url = f"/v1/lawyer/photo/{lawyer_id}" target["photo_url"] = photo_url target["photo_ext"] = ext target["photo_updated_ts"] = time.time() # Atomic rewrite of JSONL with _LAWYER_APP_LOCK: tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for r in items: f.write(json.dumps(r, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_APP_PATH) _audit_log( actor=target.get("email", "") if not is_admin else "admin", role="lawyer" if not is_admin else "admin", action="lawyer_photo_uploaded", target=lawyer_id, meta={"ext": ext, "size": len(content)}, ) return {"ok": True, "photo_url": photo_url, "size": len(content)} @app.get("/v1/lawyer/photo/{lawyer_id}") def lawyer_photo_get(lawyer_id: str): # type: ignore """v2.99.178 — Serve a lawyer's profile photo. Public (verified lawyers only).""" from fastapi.responses import Response, FileResponse if not _LAWYER_APP_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found"}) # Verify lawyer is verified (don't leak photos of pending applications) is_verified = False try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("id") == lawyer_id and rec.get("status") == "verified": is_verified = True break except Exception: pass if not is_verified: return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found"}) # Find the file for ext in _ALLOWED_PHOTO_EXTS: p = _LAWYER_PHOTO_DIR / f"{lawyer_id}.{ext}" if p.exists(): media_type = {"jpg":"image/jpeg","jpeg":"image/jpeg","png":"image/png","webp":"image/webp"}[ext] return FileResponse(p, media_type=media_type, headers={ "Cache-Control": "public, max-age=86400", }) return JSONResponse(status_code=404, content={"ok": False, "reason": "no_photo"}) @app.post("/v1/lawyer-onboarding/update") def lawyer_onboarding_update(req: _LawyerAppUpdateBody, token: str = ""): # type: ignore """Admin: update a lawyer application status.""" import os, time admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) allowed = {"submitted", "under_review", "verified", "rejected"} if req.status not in allowed: return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_status"}) if not _LAWYER_APP_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) with _LAWYER_APP_LOCK: items = [] try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) found = False for rec in items: if rec.get("id") == req.id: rec["status"] = req.status rec["updated_ts"] = time.time() rec["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) if req.admin_note: rec["admin_note"] = req.admin_note found = True; break if not found: return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"}) try: tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_APP_PATH) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) # v2.99.144 — audit _audit_log( actor="admin", role="admin", action="lawyer_application_status_update", target=req.id, meta={"new_status": req.status, "admin_note": (req.admin_note or "")[:200]}, ) return {"ok": True, "id": req.id, "status": req.status} # ────────────────────────────────────────────────────────────────────── # v2.99.130 — Auth v0 (magic-link email) # ────────────────────────────────────────────────────────────────────── class _AuthRequestLinkBody(BaseModel): # type: ignore email: str def _send_magic_link_email(email: str, link: str) -> bool: """v2.99.151 — Send via branded multipart helper.""" body = ( "שלום,\n\n" "ביקשת להיכנס ל-legal-eye. לחץ על הכפתור למטה כדי להמשיך.\n\n" "הקישור תקף ל-15 דקות.\n" "אם לא ביקשת להיכנס — אפשר פשוט להתעלם מהאימייל.\n\n" "אם הכפתור לא עובד, העתק את הקישור הבא לדפדפן:\n" f"{link}" ) ok = _send_email_branded( email, "legal-eye · קישור כניסה", body, title="קישור כניסה ל-legal-eye", cta_label="היכנס לחשבון ←", cta_url=link, ) if ok: print(f"[auth] magic link sent → {email}") else: print(f"[auth] magic link not sent (SMTP unconfigured?) → {email}") return ok @app.post("/v1/auth/request-link") def auth_request_link(req: _AuthRequestLinkBody, request: Request): # type: ignore """Generate magic link + email it. If no SMTP configured, returns the token in dev-only fashion (logged).""" import os try: from ..auth import request_magic_link result = request_magic_link(req.email) if not result["ok"]: return JSONResponse(status_code=400, content=result) # Build the verification URL — host from request, fallback to env public_host = os.environ.get("LE_PUBLIC_URL", "").strip().rstrip("/") if not public_host: # Derive from request — used by HF API but client-app lives on Vercel # so default to the known public domain. public_host = "https://legal-eye.1bigfam.com" link = f"{public_host}/?auth_token={result['token']}" sent = _send_magic_link_email(result["email"], link) # Always return success — don't leak which email is registered resp = { "ok": True, "message": "אם האימייל תקין — שלחנו קישור כניסה. בדוק את תיבת הדואר (וגם ספאם).", "email_sent": sent, } # Dev-only: if no SMTP, return the link so user can still test if not sent: resp["dev_link"] = link resp["dev_note"] = "SMTP not configured — manual link returned for testing" return resp except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.get("/v1/auth/verify") def auth_verify(token: str): # type: ignore """Consume a magic token, return session_token + user.""" try: from ..auth import verify_magic_link result = verify_magic_link(token) if not result["ok"]: return JSONResponse(status_code=400, content=result) # v2.99.144 — audit _audit_log( actor=result["user"]["email"], role="user", action="auth_signin", target=result["user"]["id"], meta={}, ) return result except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.get("/v1/auth/me") def auth_me(request: Request): # type: ignore """Return current user from Authorization: Bearer .""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) return { "ok": True, "user": {"id": sess["user_id"], "email": sess["email"]}, "expires_ts": sess["expires_ts"], } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.post("/v1/auth/logout") def auth_logout(request: Request): # type: ignore """Invalidate the current session.""" try: from ..auth import session_from_header, logout sess = session_from_header(request.headers.get("authorization")) if not sess: return {"ok": True, "already_logged_out": True} logout(sess["session_token"]) return {"ok": True} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.get("/v1/lawyer/me") def lawyer_me(request: Request): # type: ignore """v2.99.134 — If the signed-in user's email matches a lawyer application, return that lawyer's record + status. Otherwise return is_lawyer=false.""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) email = sess["email"] if not _LAWYER_APP_PATH.exists(): return {"ok": True, "is_lawyer": False, "email": email} with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("email") == email: return { "ok": True, "is_lawyer": True, "email": email, "lawyer": rec, } return {"ok": True, "is_lawyer": False, "email": email} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) class _LawyerAvailabilityBody(BaseModel): # type: ignore status: str # available | busy | unavailable class _LawyerWhatsappBody(BaseModel): # type: ignore """v2.99.204 — opt-in to publish WhatsApp link on public profile.""" public: bool @app.post("/v1/lawyer/me/availability") def lawyer_set_availability(body: _LawyerAvailabilityBody, request: Request): # type: ignore """v2.99.139 — Verified lawyer toggles their availability.""" import time try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) if body.status not in ("available", "busy", "unavailable"): return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_status"}) if not _LAWYER_APP_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_applications"}) with _LAWYER_APP_LOCK: items = [] with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue found = False for rec in items: if rec.get("email") == sess["email"]: if rec.get("status") != "verified": return JSONResponse(status_code=403, content={"ok": False, "reason": "not_verified"}) rec["availability_status"] = body.status rec["availability_changed_ts"] = time.time() rec["availability_changed_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) found = True break if not found: return JSONResponse(status_code=404, content={"ok": False, "reason": "not_a_lawyer"}) tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_APP_PATH) # v2.99.144 — audit _audit_log( actor=sess["email"], role="lawyer", action="availability_set", target=sess["email"], meta={"status": body.status}, ) return {"ok": True, "status": body.status} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) @app.post("/v1/lawyer/me/whatsapp") def lawyer_set_whatsapp_public(body: _LawyerWhatsappBody, request: Request): # type: ignore """v2.99.204 — Verified lawyer opts in/out of showing WhatsApp on public profile. Requires `phone` to be set on the lawyer record.""" import time try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) if not _LAWYER_APP_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_applications"}) with _LAWYER_APP_LOCK: items = [] with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue found = False for rec in items: if rec.get("email") == sess["email"]: if rec.get("status") != "verified": return JSONResponse(status_code=403, content={"ok": False, "reason": "not_verified"}) if body.public and not (rec.get("phone") or "").strip(): return JSONResponse(status_code=400, content={"ok": False, "reason": "no_phone_on_file"}) rec["whatsapp_public_optin"] = bool(body.public) rec["whatsapp_changed_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) found = True break if not found: return JSONResponse(status_code=404, content={"ok": False, "reason": "not_a_lawyer"}) tmp = _LAWYER_APP_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_APP_PATH) _audit_log( actor=sess["email"], role="lawyer", action="whatsapp_public_set", target=sess["email"], meta={"public": bool(body.public)}, ) return {"ok": True, "public": bool(body.public)} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) @app.get("/v1/lawyer/me/stats") def lawyer_me_stats(request: Request): # type: ignore """v2.99.146 — KPIs for the signed-in lawyer.""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) from ..matching import stats_for_lawyer result = stats_for_lawyer(sess["email"]) if not result.get("ok"): return JSONResponse(status_code=403, content=result) return result except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) @app.get("/v1/lawyer/me/requests") def lawyer_me_requests(request: Request): # type: ignore """v2.99.135 — Requests assigned to this lawyer (any response state).""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) # Verify they're a verified lawyer if not _LAWYER_APP_PATH.exists(): return {"ok": True, "n": 0, "items": [], "reason": "no_applications"} lawyer = None with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: try: rec = json.loads(line.strip()) except: continue if rec.get("email") == sess["email"]: lawyer = rec; break if not lawyer: return JSONResponse(status_code=403, content={"ok": False, "reason": "not_a_lawyer"}) if lawyer.get("status") != "verified": return { "ok": True, "n": 0, "items": [], "reason": "lawyer_not_verified", "your_status": lawyer.get("status", "submitted"), } from ..matching import requests_for_lawyer items = requests_for_lawyer(sess["email"]) # Strip user_email + contact details — lawyer doesn't see those until accepted for it in items: if it.get("lawyer_response") != "accepted": # Mask PII for not-yet-accepted requests it["contact"] = "***" it["user_email"] = None it["name"] = "" return {"ok": True, "n": len(items), "items": items} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) # ── Admin: matching ── @app.get("/v1/lawyer-request/{request_id}/match-candidates") def lawyer_request_match_candidates(request_id: str, token: str = "", top: int = 3): # type: ignore """Admin: score verified lawyers as candidates for this request.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) try: from ..matching import match_candidates return match_candidates(request_id, top=top) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) def _notify_lawyer_offered_async(record: dict, lawyer_email: str, lawyer_name: str, n_total: int) -> None: """v2.99.141 — Email a lawyer who's been OFFERED a request alongside others. Same masked-PII as direct assignment, but with race-to-accept framing.""" import threading, os if not lawyer_email: return # v2.99.143 — in-app notification domain_he = { "employment_law": "דיני עבודה", "contracts": "דיני חוזים", "consumer": "דיני צרכנות", "tenancy": "שכירות / מקרקעין", }.get(record.get("domain", ""), record.get("domain", "")) _create_notification( lawyer_email, kind="lawyer_offered", title=f"⚡ הצעה תחרותית · {n_total} עו\"ד", body=f"תחום: {domain_he or '(לא סווג)'} · ראשון לקבל זוכה", link="/lawyer-dashboard/", related_request_id=record.get("id", ""), ) def _worker(): q_short = (record.get("question") or "")[:300] urgent = bool(record.get("urgent")) domain_he_inner = { "employment_law": "דיני עבודה", "contracts": "דיני חוזים", "consumer": "דיני צרכנות", "tenancy": "שכירות / מקרקעין", }.get(record.get("domain", ""), record.get("domain", "")) body = ( f"שלום עו\"ד {lawyer_name},\n\n" f"פנייה חדשה הוצעה לך ב-legal-eye — יחד עם {n_total - 1} עו\"ד נוספים.\n" f"⚡ ראשון לקבל זוכה. האחרים מקבלים הודעת ביטול.\n\n" f"תחום: {domain_he_inner or '(לא סווג)'}\n" f"דחיפות: {'⚠ דחוף' if urgent else 'רגיל'}\n" f"מועמדים: {n_total} עו\"ד (כולל אותך)\n\n" f"השאלה של הלקוח:\n" f"\"{q_short}{'...' if len(record.get('question','')) > 300 else ''}\"\n\n" f"אם הפנייה לא מתאימה — לחץ \"דחה\" וההצעה תופנה לאחרים.\n\n" f"מזהה: {record.get('id', '?')[:8]}" ) subject = ( f"[legal-eye] {'⚠ פנייה דחופה' if urgent else 'פנייה חדשה'} · {domain_he_inner or '?'} · " f"({n_total} מועמדים) · {record.get('id','?')[:8]}" ) ok = _send_email_branded( lawyer_email, subject, body, title=f"⚡ הצעה תחרותית · {n_total} עו\"ד", cta_label="קבל את הפנייה ←", cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/", ) if ok: print(f"[lawyer-offered] sent → {lawyer_email}") threading.Thread(target=_worker, daemon=True).start() def _notify_user_lawyer_declined_async(record: dict, kind: str) -> None: """v2.99.145 — Notify the user when a lawyer declines. kind='direct' — 1-1 assignment lawyer declined kind='all_declined' — all candidates in a multi-offer race declined Fires email + in-app notification.""" import threading, os if not record.get("user_email"): return # In-app first (always) if kind == "all_declined": title = "מחפשים עו\"ד אחר עבורך" body_short = "כל העו\"ד שעמדו לרשותך לא היו זמינים. אנחנו כעת מחפשים נוספים — נחזור אליך בקרוב." else: title = "השיוך התעדכן — מחפשים עו\"ד אחר" body_short = "עו\"ד שהיינו בקשר איתו לא יכול לקחת את הפנייה. אנחנו כעת מחפשים חלופה." _create_notification( record["user_email"], kind="lawyer_declined", title=title, body=body_short, link="/my-matters/", related_request_id=record.get("id", ""), ) def _worker(): if kind == "all_declined": subject = "legal-eye · מחפשים עו\"ד אחר עבורך" title_email = "מחפשים עו\"ד אחר עבורך" body_email = ( f"שלום,\n\nכל העו\"ד שעמדו לרשותך בפנייה ב-legal-eye לא היו זמינים כרגע.\n" f"אנחנו כעת מחפשים עו\"ד אחרים — נחזור אליך תוך 24 שעות.\n\n" f"לא נדרשת ממך פעולה.\n\nמזהה: {record.get('id', '?')[:8]}" ) else: subject = "legal-eye · השיוך התעדכן — מחפשים עו\"ד אחר" title_email = "השיוך התעדכן" body_email = ( f"שלום,\n\nעו\"ד שהיינו בקשר איתו על הפנייה שלך לא יכול לקחת אותה.\n" f"אנחנו כעת מחפשים עו\"ד מתאים אחר — נחזור אליך תוך 24 שעות.\n\n" f"לא נדרשת ממך פעולה. תהליך תקין בעת התאמה.\n\nמזהה: {record.get('id', '?')[:8]}" ) ok = _send_email_branded( record["user_email"], subject, body_email, title=title_email, cta_label="לאזור האישי ←", cta_url="https://legal-eye.1bigfam.com/my-matters/", ) if ok: print(f"[user-decline-notify] {kind} → {record['user_email']}") threading.Thread(target=_worker, daemon=True).start() def _notify_lawyer_offer_cancelled_async(record: dict, lawyer_email: str, lawyer_name: str, winner_name: str) -> None: """v2.99.141 — Notify a lawyer that they lost the race.""" import threading, os if not lawyer_email: return # v2.99.143 — in-app notification _create_notification( lawyer_email, kind="offer_cancelled", title="ההצעה בוטלה — שובצה לעו\"ד אחר", body=f"לא נדרשת פעולה. הצוות יציע לך פניות נוספות בקרוב.", link="/lawyer-dashboard/", related_request_id=record.get("id", ""), ) def _worker(): body = ( f"שלום עו\"ד {lawyer_name},\n\n" f"הפנייה שהוצעה לך (מזהה {record.get('id','?')[:8]}) שובצה כבר לעו\"ד אחר.\n" f"אין צורך לפעול — הצוות יציע לך פניות נוספות בקרוב." ) ok = _send_email_branded( lawyer_email, "legal-eye · ההצעה בוטלה — שובצה לעו״ד אחר", body, title="ההצעה בוטלה — שובצה לעו\"ד אחר", cta_label="לדשבורד ←", cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/", ) if ok: print(f"[lawyer-cancelled] sent → {lawyer_email}") threading.Thread(target=_worker, daemon=True).start() class _OfferBody(BaseModel): # type: ignore lawyer_ids: List[str] @app.post("/v1/lawyer-request/{request_id}/offer-to-lawyers") def lawyer_request_offer_to_lawyers(request_id: str, body: _OfferBody, token: str = ""): # type: ignore """Admin: offer the same request to N lawyers; first to accept wins.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) try: from ..matching import offer_to_lawyers result = offer_to_lawyers(request_id, body.lawyer_ids) if not result.get("ok"): return JSONResponse(status_code=400, content=result) # Fire offer emails to all for lw in result.get("lawyers", []): for r in _read_jsonl(_LAWYER_REQUEST_PATH): if r.get("id") == request_id: _notify_lawyer_offered_async(r, lw["email"], lw["name"], result["n_offered"]) break # v2.99.144 — audit _audit_log( actor="admin", role="admin", action="lawyers_offered", target=request_id, meta={"n_offered": result["n_offered"], "lawyer_ids": body.lawyer_ids}, ) return result except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) class _AssignBody(BaseModel): # type: ignore lawyer_id: str @app.post("/v1/lawyer-request/{request_id}/assign") def lawyer_request_assign(request_id: str, body: _AssignBody, token: str = ""): # type: ignore """Admin: assign a request to a specific lawyer.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) try: from ..matching import assign_lawyer result = assign_lawyer(request_id, body.lawyer_id) if not result.get("ok"): return JSONResponse(status_code=400, content=result) # Notify user (status change to "contacted") + lawyer (new assignment) for r in _read_jsonl(_LAWYER_REQUEST_PATH): if r.get("id") == request_id: # User: only if status actually transitioned new→contacted if result.get("user_email") and result.get("old_status") == "new": _notify_user_status_change_async(r, "new", "contacted") # Lawyer: every time an assignment is made _notify_lawyer_assigned_async( r, r.get("assigned_lawyer_email", ""), r.get("assigned_lawyer_name", ""), ) break # v2.99.144 — audit _audit_log( actor="admin", role="admin", action="lawyer_assigned", target=request_id, meta={"lawyer_id": body.lawyer_id, "lawyer_name": result.get("lawyer_name", "")}, ) return result except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) # ────────────────────────────────────────────────────────────────────── # v2.99.137 — Deliverable lifecycle. After a lawyer accepts an assigned # request, they submit a deliverable (title + body text + optional file). # Status: contacted → submitted_for_approval → approved | disputed # ────────────────────────────────────────────────────────────────────── def _notify_auto_release_async(record: dict) -> None: """v2.99.138 — Email both parties when a deliverable auto-releases.""" import threading, os if not (record.get("user_email") or record.get("assigned_lawyer_email")): return # v2.99.143 — in-app notifications if record.get("user_email"): _create_notification( record["user_email"], kind="auto_release", title="⏱ הפנייה נסגרה אוטומטית (72 שעות)", body="חלפו 72 שעות מאז הגשת התוצר. לא נדרשת פעולה.", link="/my-matters/", related_request_id=record.get("id", ""), ) if record.get("assigned_lawyer_email"): _create_notification( record["assigned_lawyer_email"], kind="auto_release", title="⏱ התוצר שלך שוחרר אוטומטית", body="הלקוח לא הגיב תוך 72 שעות.", link="/lawyer-dashboard/", related_request_id=record.get("id", ""), ) def _worker(): rid = record.get("id", "?")[:8] if record.get("user_email"): body = ( f"שלום,\n\n" f"חלפו 72 שעות מאז שעו\"ד {record.get('assigned_lawyer_name','')} הגיש לך את התוצר ב-legal-eye. " f"מאחר שלא הגעת לאישור או למחלוקת, הפנייה נסגרה אוטומטית.\n\n" f"אם יש לך שאלות המשך — אפשר לפתוח פנייה חדשה.\n\nמזהה: {rid}" ) _send_email_branded( record["user_email"], "legal-eye · הפנייה נסגרה אוטומטית (72 שעות)", body, title="⏱ הפנייה נסגרה אוטומטית (72 שעות)", cta_label="לאזור האישי ←", cta_url="https://legal-eye.1bigfam.com/my-matters/", ) if record.get("assigned_lawyer_email"): body = ( f"שלום עו\"ד {record.get('assigned_lawyer_name','')},\n\n" f"חלפו 72 שעות מאז שהגשת את התוצר על פנייה {rid}.\n" f"הלקוח לא הגיב — הפנייה נסגרה אוטומטית (auto-release).\n\n" f"(תשלום ישוחרר אוטומטית בעתיד הקרוב, כשמודול התשלומים יושק)\n\nמזהה: {rid}" ) _send_email_branded( record["assigned_lawyer_email"], "legal-eye · התוצר שלך שוחרר אוטומטית", body, title="⏱ התוצר שלך שוחרר אוטומטית", cta_label="לדשבורד ←", cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/", ) threading.Thread(target=_worker, daemon=True).start() def _scan_auto_release() -> int: """Scan lawyer_requests.jsonl. Auto-approve any record where: - status == submitted_for_approval - deliverable_ts < (now - 72h) Returns count of records auto-released.""" import time if not _LAWYER_REQUEST_PATH.exists(): return 0 cutoff = time.time() - (72 * 60 * 60) n = 0 try: with _LAWYER_REQUEST_LOCK: items = [] with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue changed = False to_notify = [] for rec in items: if rec.get("status") != "submitted_for_approval": continue if not rec.get("deliverable_ts"): continue if rec["deliverable_ts"] > cutoff: continue # Auto-release rec["status"] = "done" rec["auto_approved"] = True rec["approval_ts"] = time.time() rec["approval_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) rec["updated_ts"] = time.time() rec["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) changed = True n += 1 to_notify.append(rec) if changed: tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_REQUEST_PATH) # Notify outside the lock for rec in to_notify: _notify_auto_release_async(rec) # v2.99.144 — audit _audit_log( actor="system", role="system", action="auto_release", target=rec.get("id", ""), meta={"deliverable_age_hours": round((time.time() - rec["deliverable_ts"]) / 3600, 1)}, ) if n: print(f"[auto-release] {n} record(s) auto-approved after 72h") return n except Exception as e: print(f"[auto-release] scan FAIL: {type(e).__name__}: {e}") return 0 def _start_auto_release_thread() -> None: """Background thread that calls _scan_auto_release every 30 min.""" import threading, time def _loop(): # Initial delay 60s — let app finish booting time.sleep(60) while True: try: _scan_auto_release() except Exception as e: print(f"[auto-release] loop FAIL: {e}") time.sleep(30 * 60) # 30 min t = threading.Thread(target=_loop, daemon=True, name="auto-release") t.start() print("[auto-release] background thread started (30-min interval)") # Kick off on module import _start_auto_release_thread() @app.post("/v1/admin/auto-release-now") def admin_auto_release_now(token: str = ""): # type: ignore """Admin: trigger an auto-release scan manually (for testing).""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) n = _scan_auto_release() return {"ok": True, "auto_released": n} def _notify_user_deliverable_submitted_async(record: dict, lawyer_name: str) -> None: """Email the user when their lawyer submits a deliverable.""" import threading, os if not record.get("user_email"): return # v2.99.143 — in-app notification _create_notification( record["user_email"], kind="deliverable_submitted", title=f"⚖ עו\"ד {lawyer_name} הגיש לך תוצר", body="בדוק ואשר/ערער תוך 72 שעות, או שחרור אוטומטי.", link="/my-matters/", related_request_id=record.get("id", ""), ) def _worker(): body = ( f"שלום,\n\n" f"עו\"ד {lawyer_name} סיים את העבודה על הפנייה שלך ב-legal-eye.\n\n" f"בדוק את התשובה ואשר תוך 72 שעות, או פתח מחלוקת אם יש בעיה.\n" f"לאחר 72 שעות ללא תגובה — התשלום ישוחרר אוטומטית.\n\n" f"מזהה: {record.get('id', '?')[:8]}" ) ok = _send_email_branded( record["user_email"], "legal-eye · התשובה המשפטית שלך מוכנה", body, title=f"⚖ עו\"ד {lawyer_name} הגיש לך תוצר", cta_label="קרא את התשובה ←", cta_url="https://legal-eye.1bigfam.com/my-matters/", ) if ok: print(f"[deliverable-notify] sent → {record['user_email']}") threading.Thread(target=_worker, daemon=True).start() def _notify_lawyer_user_action_async(record: dict, action: str, reason: str = "") -> None: """Email the lawyer when user approves or disputes their deliverable.""" import threading, os if not record.get("assigned_lawyer_email"): return # v2.99.143 — in-app notification if action == "approve": _create_notification( record["assigned_lawyer_email"], kind="deliverable_approved", title="✓ הלקוח אישר את התוצר שלך", body="הפנייה נסגרה.", link="/lawyer-dashboard/", related_request_id=record.get("id", ""), ) else: _create_notification( record["assigned_lawyer_email"], kind="deliverable_disputed", title="⚠ הלקוח פתח מחלוקת", body=(reason or "(לא צוין)")[:200], link="/lawyer-dashboard/", related_request_id=record.get("id", ""), ) def _worker(): lawyer_name = record.get("assigned_lawyer_name", "") if action == "approve": subject = "legal-eye · התוצר שלך אושר ✓" title = "✓ הלקוח אישר את התוצר שלך" body = ( f"שלום עו\"ד {lawyer_name},\n\n" f"הלקוח אישר את התוצר שהגשת. הפנייה נסגרה.\n\n" f"מזהה: {record.get('id', '?')[:8]}\n\n" f"(תשלום ישוחרר אוטומטית בעתיד הקרוב, כשמודול התשלומים יושק)" ) else: subject = "legal-eye · ⚠ מחלוקת על התוצר" title = "⚠ הלקוח פתח מחלוקת" body = ( f"שלום עו\"ד {lawyer_name},\n\n" f"הלקוח פתח מחלוקת על התוצר שהגשת. הצוות יבדוק ויחזור אליך.\n\n" f"סיבה מהלקוח:\n{reason or '(לא צוין)'}\n\n" f"מזהה: {record.get('id', '?')[:8]}" ) ok = _send_email_branded( record["assigned_lawyer_email"], subject, body, title=title, cta_label="לדשבורד ←", cta_url="https://legal-eye.1bigfam.com/lawyer-dashboard/", ) if ok: print(f"[lawyer-action-notify] {action} → {record['assigned_lawyer_email']}") threading.Thread(target=_worker, daemon=True).start() @app.post("/v1/lawyer/me/request/submit-deliverable") async def lawyer_submit_deliverable( # type: ignore request: Request, request_id: str = Form(...), title: str = Form(...), body: str = Form(...), file: Optional[UploadFile] = File(None), ): """Lawyer submits the deliverable for an accepted request.""" import time try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) if not title.strip() or len(body.strip()) < 10: return JSONResponse(status_code=400, content={"ok": False, "reason": "title_or_body_too_short"}) # Find request + verify it's assigned to this lawyer if not _LAWYER_REQUEST_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) with _LAWYER_REQUEST_LOCK: items = [] with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue target = None for rec in items: if rec.get("id") == request_id: target = rec; break if not target: return JSONResponse(status_code=404, content={"ok": False, "reason": "request_not_found"}) if target.get("assigned_lawyer_email") != sess["email"]: return JSONResponse(status_code=403, content={"ok": False, "reason": "not_assigned_to_you"}) if target.get("lawyer_response") != "accepted": return JSONResponse(status_code=400, content={"ok": False, "reason": "request_not_accepted_yet"}) if target.get("status") == "submitted_for_approval": return JSONResponse(status_code=400, content={"ok": False, "reason": "deliverable_already_submitted"}) # Optional file deliverable_filename = None deliverable_size = None deliverable_mime = None if file is not None and file.filename: orig_name = (file.filename or "deliverable").strip() ext = "." + orig_name.rsplit(".", 1)[-1].lower() if "." in orig_name else "" if ext not in _DOC_ALLOWED_EXT: return JSONResponse(status_code=400, content={ "ok": False, "reason": "unsupported_filetype", "allowed": sorted(_DOC_ALLOWED_EXT), }) content = await file.read() if len(content) > _DOC_MAX_BYTES: return JSONResponse(status_code=413, content={"ok": False, "reason": "file_too_large"}) if len(content) > 0: safe_name = _re.sub(r"[^\w\-.()]+", "_", orig_name)[:120] or ("deliverable" + ext) deliverable_dir = _DELIVERABLE_DIR / request_id deliverable_dir.mkdir(parents=True, exist_ok=True) with open(deliverable_dir / safe_name, "wb") as f: f.write(content) deliverable_filename = safe_name deliverable_size = len(content) deliverable_mime = file.content_type or "application/octet-stream" # Save deliverable fields onto the record target["deliverable_title"] = title.strip() target["deliverable_body"] = body.strip() target["deliverable_ts"] = time.time() target["deliverable_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) target["deliverable_filename"] = deliverable_filename target["deliverable_size"] = deliverable_size target["deliverable_mime"] = deliverable_mime target["status"] = "submitted_for_approval" target["updated_ts"] = time.time() target["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) # Rewrite tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_REQUEST_PATH) # Notify user (async) _notify_user_deliverable_submitted_async(target, target.get("assigned_lawyer_name", "")) # v2.99.144 — audit _audit_log( actor=sess["email"], role="lawyer", action="deliverable_submitted", target=request_id, meta={"title": title.strip()[:120], "has_attachment": bool(deliverable_filename)}, ) return { "ok": True, "request_id": request_id, "status": "submitted_for_approval", "has_attachment": bool(deliverable_filename), "attachment_size": deliverable_size, "message": "התוצר הוגש. הלקוח קיבל אימייל ויאשר/יערער תוך 72 שעות.", } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.get("/v1/user/me/request/{request_id}/deliverable-file") def user_get_deliverable_file(request_id: str, request: Request): # type: ignore """User downloads their lawyer's deliverable file. Auth-gated by ownership.""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) if not _LAWYER_REQUEST_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) target = None with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("id") == request_id and rec.get("user_id") == sess["user_id"]: target = rec; break if not target: return JSONResponse(status_code=404, content={"ok": False, "reason": "not_found_or_not_yours"}) fname = target.get("deliverable_filename") if not fname: return JSONResponse(status_code=404, content={"ok": False, "reason": "no_file"}) path = _DELIVERABLE_DIR / request_id / fname if not path.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "file_missing_on_disk"}) from fastapi.responses import FileResponse as _FileResponse return _FileResponse( path, media_type=target.get("deliverable_mime") or "application/octet-stream", filename=fname, ) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) class _UserApprovalBody(BaseModel): # type: ignore request_id: str action: str # approve | dispute reason: Optional[str] = "" rating: Optional[int] = None # v2.99.189 — 1-5 stars (approval only) review: Optional[str] = "" # v2.99.189 — optional text review @app.post("/v1/user/me/request/approval") def user_approve_or_dispute(body: _UserApprovalBody, request: Request): # type: ignore """User approves the lawyer's deliverable, or opens a dispute.""" import time try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) if body.action not in ("approve", "dispute"): return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_action"}) with _LAWYER_REQUEST_LOCK: items = [] with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue target = None for rec in items: if rec.get("id") == body.request_id: target = rec; break if not target: return JSONResponse(status_code=404, content={"ok": False, "reason": "request_not_found"}) if target.get("user_id") != sess["user_id"]: return JSONResponse(status_code=403, content={"ok": False, "reason": "not_your_request"}) if target.get("status") != "submitted_for_approval": return JSONResponse(status_code=400, content={"ok": False, "reason": "no_pending_deliverable"}) target["status"] = "done" if body.action == "approve" else "disputed" target["updated_ts"] = time.time() target["updated_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) if body.action == "approve": target["approval_ts"] = time.time() target["approval_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) # v2.99.189 — optional rating + review if body.rating is not None: try: r = int(body.rating) if 1 <= r <= 5: target["review_rating"] = r target["review_text"] = (body.review or "")[:600] target["review_ts"] = time.time() except Exception: pass else: target["dispute_reason"] = (body.reason or "").strip() target["dispute_ts"] = time.time() target["dispute_iso"] = time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()) tmp = _LAWYER_REQUEST_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_LAWYER_REQUEST_PATH) # Notify lawyer _notify_lawyer_user_action_async(target, body.action, body.reason or "") # v2.99.144 — audit _audit_log( actor=sess["email"], role="user", action=f"deliverable_{body.action}", target=body.request_id, meta={"reason": (body.reason or "")[:200] if body.action == "dispute" else None}, ) return {"ok": True, "request_id": body.request_id, "new_status": target["status"]} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) class _LawyerRespondBody(BaseModel): # type: ignore request_id: str action: str # accept | decline @app.post("/v1/lawyer/me/request/respond") def lawyer_respond_endpoint(body: _LawyerRespondBody, request: Request): # type: ignore """Lawyer accepts or declines an assignment (or competitive offer).""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) from ..matching import lawyer_respond result = lawyer_respond(body.request_id, sess["email"], body.action) if not result.get("ok"): return JSONResponse(status_code=400, content=result) # v2.99.141 — If this was a race-win, fire cancellation emails to the # other lawyers + notify user that they got a lawyer (status change) if result.get("won_race"): for loser in (result.get("cancelled_others") or []): _notify_lawyer_offer_cancelled_async( {"id": body.request_id}, loser["email"], loser.get("name", ""), sess["email"], ) # Also: user notify (status went new→contacted) for r in _read_jsonl(_LAWYER_REQUEST_PATH): if r.get("id") == body.request_id: if r.get("user_email"): _notify_user_status_change_async(r, "new", "contacted") break # v2.99.145 — Notify user when a lawyer declines: # - direct 1-1 decline → "lawyer can't take it, finding alternative" # - all candidates declined in race → "no one was available, retrying" if result.get("is_direct_decline") or result.get("all_declined_after_race"): kind = "all_declined" if result.get("all_declined_after_race") else "direct" for r in _read_jsonl(_LAWYER_REQUEST_PATH): if r.get("id") == body.request_id: _notify_user_lawyer_declined_async(r, kind) break # v2.99.144 — audit _audit_log( actor=sess["email"], role="lawyer", action=f"lawyer_response_{body.action}", target=body.request_id, meta={ "is_offered_flow": result.get("is_offered_flow", False), "won_race": result.get("won_race", False), "n_cancelled": len(result.get("cancelled_others") or []), }, ) return result except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) def _read_jsonl(path): """Local helper for reading lawyer_requests in the assign endpoint.""" import json as _j if not path.exists(): return [] out = [] try: with open(path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: out.append(_j.loads(line)) except: continue except Exception: pass return out @app.get("/v1/user/me/requests") def user_my_requests(request: Request): # type: ignore """Return the signed-in user's own lawyer-request submissions. v2.99.155 — Each item now includes `lawyer_public_slug` if a lawyer is assigned, so the UI can link to the lawyer's profile page.""" try: from ..auth import session_from_header sess = session_from_header(request.headers.get("authorization")) if not sess: return JSONResponse(status_code=401, content={"ok": False, "reason": "not_authenticated"}) user_id = sess["user_id"] if not _LAWYER_REQUEST_PATH.exists(): return {"ok": True, "n": 0, "items": []} items = [] with open(_LAWYER_REQUEST_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) except: continue if rec.get("user_id") == user_id: rec.pop("admin_note", None) items.append(rec) items.sort(key=lambda r: r.get("ts", 0), reverse=True) # v2.99.155 — Enrich each item with the assigned lawyer's public slug. lawyer_ids = {it.get("assigned_lawyer_id") for it in items if it.get("assigned_lawyer_id")} slug_by_id: Dict[str, str] = {} if lawyer_ids and _LAWYER_APP_PATH.exists(): try: with open(_LAWYER_APP_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: lrec = json.loads(line) except: continue if lrec.get("id") in lawyer_ids and lrec.get("status") == "verified": slug_by_id[lrec["id"]] = _lawyer_public_slug(lrec) except Exception: pass for it in items: lid = it.get("assigned_lawyer_id") if lid and lid in slug_by_id: it["lawyer_public_slug"] = slug_by_id[lid] return {"ok": True, "n": len(items), "items": items} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) @app.get("/v1/auth/users") def auth_users_list(token: str = ""): # type: ignore """Admin: list all users.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) try: from ..auth import list_users users = list_users() return {"ok": True, "n": len(users), "items": users} except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) class _TriageRequest(BaseModel): # type: ignore text: str # v2.99.128 — Triage logging for future tuning _TRIAGE_LOG_PATH = _wa_pl.Path("tau_rag/runtime/triage_log.jsonl") _TRIAGE_LOG_LOCK = _wa_th.Lock() def _log_triage_async(text: str, result: dict) -> None: """Append a triage call to JSONL for later review. Fire-and-forget.""" import threading, time, uuid def _worker(): try: _TRIAGE_LOG_PATH.parent.mkdir(parents=True, exist_ok=True) entry = { "id": str(uuid.uuid4())[:12], "ts": time.time(), "iso": time.strftime("%Y-%m-%dT%H:%M:%S%z", time.localtime()), "text": (text or "")[:500], # truncate to keep file lean "ok": bool(result.get("ok")), } if result.get("ok"): entry.update({ "triage_category": result.get("triage_category"), "domain": result.get("domain"), "domain_supported": result.get("domain_supported"), "risk_level": result.get("risk_level"), "urgent": result.get("urgent"), "ai_answer_allowed": result.get("ai_answer_allowed"), "lawyer_recommended": result.get("lawyer_recommended"), "signals": result.get("signals"), }) else: entry["reason"] = result.get("reason") with _TRIAGE_LOG_LOCK: with open(_TRIAGE_LOG_PATH, "a", encoding="utf-8") as f: f.write(json.dumps(entry, ensure_ascii=False) + "\n") except Exception as e: print(f"[triage-log] failed: {type(e).__name__}: {e}") threading.Thread(target=_worker, daemon=True).start() @app.post("/v1/triage/classify") def triage_classify(req: _TriageRequest): # type: ignore """v2.99.119 — Rule-based Hebrew legal triage. No LLM. v2.99.128 — Logs anonymized text + result for future tuning. Takes a Hebrew user question, returns: triage_category, domain, risk_level, ai_answer_allowed, lawyer_recommended, urgent, missing_facts, next_actions. """ try: from ..triage import classify_question result = classify_question(req.text or "") _log_triage_async(req.text or "", result) return result except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}", }) @app.get("/v1/triage/log") def triage_log(token: str = "", limit: int = 200): # type: ignore """Admin: review triage classifications for accuracy.""" import os admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) if not _TRIAGE_LOG_PATH.exists(): return {"ok": True, "n": 0, "items": []} items = [] try: with open(_TRIAGE_LOG_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) # Return newest first, capped items.reverse() capped = items[:max(1, min(limit, 2000))] return {"ok": True, "n_total": len(items), "n_returned": len(capped), "items": capped} class _TriageReviewBody(BaseModel): # type: ignore id: str verdict: str # correct | wrong | flag expected_category: Optional[str] = "" note: Optional[str] = "" @app.post("/v1/triage/log/review") def triage_log_review(req: _TriageReviewBody, token: str = ""): # type: ignore """Admin: mark a triage log entry as correct/wrong with optional expected category + note.""" import os, time admin_token = os.environ.get("LE_ADMIN_TOKEN", "") if not admin_token or token != admin_token: return JSONResponse(status_code=403, content={"ok": False, "reason": "forbidden"}) allowed = {"correct", "wrong", "flag"} if req.verdict not in allowed: return JSONResponse(status_code=400, content={"ok": False, "reason": "invalid_verdict"}) if not _TRIAGE_LOG_PATH.exists(): return JSONResponse(status_code=404, content={"ok": False, "reason": "no_records"}) with _TRIAGE_LOG_LOCK: items = [] try: with open(_TRIAGE_LOG_PATH, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: try: items.append(json.loads(line)) except: continue except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) found = False for rec in items: if rec.get("id") == req.id: rec["review_verdict"] = req.verdict rec["review_expected_category"] = req.expected_category or "" rec["review_note"] = req.note or "" rec["review_ts"] = time.time() found = True; break if not found: return JSONResponse(status_code=404, content={"ok": False, "reason": "id_not_found"}) try: tmp = _TRIAGE_LOG_PATH.with_suffix(".jsonl.tmp") with open(tmp, "w", encoding="utf-8") as f: for rec in items: f.write(json.dumps(rec, ensure_ascii=False) + "\n") tmp.replace(_TRIAGE_LOG_PATH) except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": str(e)}) return {"ok": True, "id": req.id, "verdict": req.verdict} @app.get("/v1/law/citers") def law_citers(name: str, top: int = 30): # type: ignore """v2.99.109 — Judgments in the corpus that cite this law. Resolves the input name to canonical via aliases, then collects all doc_ids that mention the canonical form OR any of its aliases. Returns each citer with court, date, and a snippet around the first match. Sorted by recency (newest first). """ try: from ..pipeline import get_pipeline from ..citation_network import get_or_build pipe = get_pipeline() cn = get_or_build(pipe) nm = (name or "").strip() if not nm: return JSONResponse(status_code=400, content={"ok": False, "reason": "empty_name"}) # Build the set of strings to look up in cited_by: # canonical, all aliases pointing TO canonical, plus the input. lookup_strs = {nm} alias_map = {} try: with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f: alias_map = json.load(f) except Exception: pass # If input is an alias, jump to canonical and pick up other aliases for it canonical = alias_map.get(nm, nm) lookup_strs.add(canonical) for short, canon in alias_map.items(): if canon == canonical: lookup_strs.add(short) # Also add year-stripped short of canonical _year_re = _re.compile(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־–—]\d{4}\s*$") short_canon = _year_re.sub("", canonical).strip() if short_canon != canonical: lookup_strs.add(short_canon) # Collect citer doc_ids seen_ids = set() for s in lookup_strs: for did in (cn.cited_by.get(s, []) or []): seen_ids.add(did) # Also scan cited_by keys for fuzzy matches starting with the canonical-short # (catches "X, תש..." variants when listed under a different normalization) prefix_form = short_canon + "," for k in cn.cited_by: if k.startswith(prefix_form) or k == short_canon: for did in cn.cited_by[k]: seen_ids.add(did) # Build metadata for each (Document uses .id; some pipeline variants # have .doc_id — handle both) docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) doc_by_id = {} for d in docs: did = getattr(d, "doc_id", None) or getattr(d, "id", None) if did: doc_by_id[did] = d out = [] # v2.99.111 — filter to judgments only (skip law/statute/wiki cross-refs # that get indexed under similar IDs) NON_JUDGMENT_PREFIXES = ("heb_law/", "heb_statute/", "heb_wikilaw/", "heb_kolzchut/") for did in list(seen_ids): if any(did.startswith(p) for p in NON_JUDGMENT_PREFIXES): continue d = doc_by_id.get(did) if not d: continue md = getattr(d, "metadata", None) or {} # Skip if metadata flags this as a statute/law doc if md.get("kind") in ("statute", "law", "wikipedia", "kolzchut"): continue # v2.99.113 — d.text may be empty (lazy); use pipe.get_text fallback text = getattr(d, "text", "") or "" if not text and hasattr(pipe, "get_text"): try: text = pipe.get_text(did) or "" except Exception: text = "" cite = md.get("citation") or did dt = (md.get("verdict_dt") or "")[:10] # v2.99.112 — Find first occurrence; try SHORTEST forms first # (longer canonical with year suffix rarely appears verbatim) snippet = "" matched_str = "" for s in sorted(lookup_strs, key=len): idx = text.find(s) if idx >= 0: matched_str = s start = max(0, idx - 80) end = min(len(text), idx + len(s) + 120) snippet = ("…" if start > 0 else "") + text[start:end] + ("…" if end < len(text) else "") break # v2.99.116 — law-specific sentiment (apply/interpret/limit/expand) rel = _classify_law_citation_context(text, matched_str) if matched_str else "apply" out.append({ "doc_id": did, "citation": cite, "court": md.get("court", ""), "verdict_dt": dt, "snippet": snippet, "relation": rel, }) # Sort: newest first, then by court rank (Supreme first) def _year(r): s = (r.get("verdict_dt") or "")[:4] return -int(s) if s.isdigit() else 0 out.sort(key=_year) n_total = len(out) out = out[:max(1, top)] return { "ok": True, "name": nm, "canonical": canonical, "n_total": n_total, "n_returned": len(out), "lookup_terms": sorted(lookup_strs), "items": out, } except Exception as e: return JSONResponse(status_code=500, content={"ok": False, "error": f"{type(e).__name__}: {e}"}) @app.get("/v1/laws/known-names") def laws_known_names(): # type: ignore """v2.99.108 — Return short names for all cached laws so the client can linkify "סעיף N לחוק X" references in judgment text. Each item has: - canonical: the title stored on disk - aliases: list of short names that map to it (from aliases.json + an auto-derived year-stripped short form) """ if not _LAW_CACHE_DIR.exists(): return {"ok": True, "n": 0, "items": []} # Load aliases alias_map = {} # short → canonical try: with open(_LAW_ALIAS_FILE, "r", encoding="utf-8") as f: alias_map = json.load(f) except Exception: pass # Reverse: canonical → list of short forms by_canonical: Dict[str, List[str]] = {} for short, canonical in alias_map.items(): by_canonical.setdefault(canonical, []).append(short) # Build response _year_re = _re.compile(r",\s+ה?תש[א-ת][\"׳״]?[א-ת]?[-־–—]\d{4}\s*$") items = [] for c in _law_cache_list(): canonical = c["name"] aliases = list(by_canonical.get(canonical, [])) # Auto-add year-stripped short form short = _year_re.sub("", canonical).strip() if short != canonical and short not in aliases: aliases.append(short) items.append({"canonical": canonical, "aliases": aliases}) return {"ok": True, "n": len(items), "items": items} @app.get("/v1/laws/search") def laws_search(q: str, top: int = 30): # type: ignore """v2.99.107 — Full-text search across all cached laws. Iterates over cached law JSONs (currently ~18, ~13K paragraphs), finds paragraphs containing the query, returns hits with law name, section title, paragraph num/text, and a context snippet. No external service — pure CPU-bound grep over the bundled cache. Typical latency: <100ms for 13K paragraphs.""" q_norm = (q or "").strip() if not q_norm or len(q_norm) < 2: return {"ok": False, "reason": "query_too_short"} if not _LAW_CACHE_DIR.exists(): return {"ok": True, "n": 0, "items": [], "n_searched": 0} hits = [] n_paras_searched = 0 n_laws_searched = 0 q_lower = q_norm.lower() for p in sorted(_LAW_CACHE_DIR.glob("*.json")): if p.name == "aliases.json": continue try: with open(p, "r", encoding="utf-8") as f: d = json.load(f) except Exception: continue n_laws_searched += 1 law_name = d.get("name") or "?" for s in (d.get("sections") or []): sec_title = s.get("title") or "" for para in (s.get("paragraphs") or []): n_paras_searched += 1 text = para.get("text") or "" if q_norm in text or q_lower in text.lower(): # Build snippet: ±60 chars around first match idx = text.find(q_norm) if idx < 0: idx = text.lower().find(q_lower) start = max(0, idx - 60) end = min(len(text), idx + len(q_norm) + 60) snippet = ("…" if start > 0 else "") + text[start:end] + ("…" if end < len(text) else "") hits.append({ "law_name": law_name, "section": sec_title, "para_num": para.get("num"), "snippet": snippet, "text": text[:400], # truncate for payload size }) if len(hits) >= top: break if len(hits) >= top: break if len(hits) >= top: break return { "ok": True, "n": len(hits), "n_searched": n_paras_searched, "n_laws": n_laws_searched, "query": q_norm, "items": hits, } @app.get("/v1/law/by-name") def law_by_name(name: str): # type: ignore """Day 55 — fetch an Israeli law's full text from Wikisource and return it as structured sections. Wikisource has the canonical Hebrew text of every major Israeli law (חוק החוזים, חוק יסוד, פקודות, תקנות) under predictable titles. We search → fetch extract → split into chapters/sections. Args: name: Free-text law name, e.g. "חוק החוזים", "חוק יסוד כבוד האדם", "פקודת הנזיקין", "תקנות סדר הדין האזרחי". """ import re as _r import urllib.parse as _up try: import urllib.request as _ur q = (name or "").strip() if not q: return JSONResponse(status_code=400, content={ "ok": False, "reason": "empty_name", }) # v2.99.101 — Cache hit: serve from disk if previously fetched. cached = _law_cache_get(q) if cached: cached["from_cache"] = True return cached # 1. Search Wikisource for matching titles. Israeli laws are # usually titled exactly as the law name (e.g. "חוק החוזים # (חלק כללי)"). Prefer title prefix matches. search_url = ( "https://he.wikisource.org/w/api.php?action=query&format=json" "&list=search&srnamespace=0&srlimit=5&srsearch=" + _up.quote(q) ) req = _ur.Request(search_url, headers={ "User-Agent": "legal-eye-bot/1.0 (https://legal-eye.1bigfam.com)" }) with _ur.urlopen(req, timeout=8) as r: search_data = json.loads(r.read().decode("utf-8", errors="replace")) hits = (search_data.get("query") or {}).get("search") or [] if not hits: return JSONResponse(status_code=404, content={ "ok": False, "reason": "not_found_in_wikisource", "query": q, }) # Score hits: exact-prefix match wins def _score(h): t = h.get("title", "") if t.startswith(q): return 100 if q in t: return 50 return 0 hits.sort(key=_score, reverse=True) title = hits[0]["title"] # 2. Fetch wikitext (NOT extract — Israeli laws use custom # `{{ח:סעיף}}` templates that the extract API discards). wt_url = ( "https://he.wikisource.org/w/api.php?action=parse&format=json" "&prop=wikitext&page=" + _up.quote(title) ) req = _ur.Request(wt_url, headers={ "User-Agent": "legal-eye-bot/1.0 (https://legal-eye.1bigfam.com)" }) with _ur.urlopen(req, timeout=10) as r: wt_data = json.loads(r.read().decode("utf-8", errors="replace")) wt_obj = (wt_data.get("parse") or {}).get("wikitext") or {} wikitext = wt_obj.get("*") if isinstance(wt_obj, dict) else str(wt_obj) if not wikitext or len(wikitext) < 200: return JSONResponse(status_code=404, content={ "ok": False, "reason": "wikitext_empty", "title": title, }) # 3. Parse Hebrew-law templates → markdown-ish intermediate form. # See https://he.wikisource.org/wiki/תבנית:ח:סעיף for full template set. extract_lines = [] # First pass: drop unused templates / structural noise wt = wikitext # Drop ENTIRE templates we don't care about (note-boxes, formatting) for drop_tpl in ("ח:תיבה", "ח:מפריד", "ח:סוגר", "ח:סופר", "ח:התחלה", "ח:פתיח-התחלה", "ח:מאגר", "ח:סוף", "ח:פתיח-סוף"): wt = _r.sub(r"\{\{" + drop_tpl + r"[^{}]*?\}\}", "", wt) # Drop internal HTML (table-of-contents divs) wt = _r.sub(r"]*>", "", wt) wt = _r.sub(r"
", "", wt) wt = _r.sub(r"", "\n", wt) # Internal wiki-links: keep just the display text wt = _r.sub(r"\{\{ח:פנימי\|[^|}]*\|([^}]*)\}\}", r"\1", wt) wt = _r.sub(r"\{\{ח:פנימי\|([^}|]*)\}\}", r"\1", wt) # Plain [[link|text]] → text; [[link]] → link wt = _r.sub(r"\[\[[^|\]]*\|([^\]]*)\]\]", r"\1", wt) wt = _r.sub(r"\[\[([^\]]*)\]\]", r"\1", wt) # Italic / bold markers wt = _r.sub(r"'''([^']+)'''", r"\1", wt) wt = _r.sub(r"''([^']+)''", r"\1", wt) # Parse line by line sections = [] cur_title = None cur_kind = "preamble" cur_paras = [] pending_num = None # set by {{ח:סעיף|N|TITLE}}, consumed by next {{ח:ת}} pending_subnum = None # set by {{ח:תת|(M)}}, consumed by following text title_line = None def _flush_section(): nonlocal cur_paras, cur_title, cur_kind if cur_title is None and not cur_paras: return sections.append({ "title": cur_title, "kind": cur_kind, "paragraphs": [p for p in cur_paras if p.get("text")], }) cur_paras = [] for raw_line in wt.split("\n"): line = raw_line.strip() if not line: continue # {{ח:כותרת|TITLE}} — overall law title (use as canonical name) m = _r.match(r"\{\{ח:כותרת\|([^}]+)\}\}", line) if m: title_line = m.group(1).strip() continue # {{ח:קטע2|ANCHOR|TITLE}} — chapter header m = _r.match(r"\{\{ח:קטע2\|[^|]*\|([^}]+)\}\}", line) if m: _flush_section() cur_title = m.group(1).strip() cur_kind = "chapter" pending_num = None pending_subnum = None continue # {{ח:קטע3|ANCHOR|TITLE}} — sub-chapter header (סימן) m = _r.match(r"\{\{ח:קטע3\|[^|]*\|([^}]+)\}\}", line) if m: _flush_section() cur_title = m.group(1).strip() cur_kind = "subchapter" pending_num = None pending_subnum = None continue # {{ח:סעיף|N|TITLE}} — section header (numbered) m = _r.match(r"\{\{ח:סעיף\|([^|]+)\|([^}]+)\}\}", line) if m: pending_num = m.group(1).strip() # Add a "section title" line BEFORE first sub-paragraph cur_paras.append({ "num": pending_num + ".", "text": m.group(2).strip(), "is_section_title": True, }) pending_subnum = None continue # {{ח:תת|(MARK)}} — sub-paragraph marker on its own m = _r.match(r"\{\{ח:תת\|([^}]+)\}\}\s*(.*)", line) if m: pending_subnum = m.group(1).strip() rest = m.group(2).strip() if rest: cur_paras.append({ "num": pending_subnum, "text": _strip_remaining_templates(rest), }) pending_subnum = None continue # {{ח:ת}} TEXT — plain paragraph (in current section) m = _r.match(r"\{\{ח:ת\}\}\s*(.*)", line) if m: rest = _strip_remaining_templates(m.group(1).strip()) num = pending_subnum or pending_num if num: cur_paras.append({"num": num, "text": rest}) pending_subnum = None # consume pending_num = None # consume (only first ת after סעיף) else: cur_paras.append({"text": rest}) continue # Bare text (no template prefix) — append to last paragraph if any if line and not line.startswith("{{") and not line.startswith("}}"): cleaned = _strip_remaining_templates(line) if cleaned and cur_paras: cur_paras[-1]["text"] += " " + cleaned elif cleaned: cur_paras.append({"text": cleaned}) _flush_section() n_total_paras = sum(len(s.get("paragraphs") or []) for s in sections) if n_total_paras == 0: return JSONResponse(status_code=500, content={ "ok": False, "reason": "parse_failed_no_paragraphs", "title": title, "wikitext_len": len(wikitext), }) canonical_name = title_line or title response = { "ok": True, "kind": "law", "name": canonical_name, "source": "wikisource", "source_url": "https://he.wikisource.org/wiki/" + _up.quote(title.replace(" ", "_")), "n_sections": len(sections), "n_paragraphs": n_total_paras, "n_chars": len(wikitext), "sections": sections, } # v2.99.101 — persist to disk cache (under canonical title + alias) try: _law_cache_set(q, canonical_name, response) except Exception as e: print(f"[law-cache] save fail: {e}") return response except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) # v2.99.90 — Smart Insights: "Related cases" with sentiment detection # Given a cite (e.g., ע"א 4628/93), find all cases in our corpus that # mention it. For each, look at the context around the mention to detect # whether the citing case is AFFIRMING, DISTINGUISHING, or OVERTURNING # the original ruling. _REL_OVERTURN_RX = _re.compile( r"(?:ביטל(?:נו|תי|ה)?|הפך(?:נו|תי|ה)?|לבטל\b|לסטות\s+מ|" r"אינ[הו]\s+(?:עוד\s+)?הלכה|ההלכה.{0,40}שונתה|התרחק(?:נו|תי|ה)?\s+מ|" r"דחי[נתה]\s+(?:את\s+)?(?:ההלכה|הקביעה)|מבוטל(?:ת)?\s+ב?[זה]?[אה]?ת\s+פסק)" ) _REL_DISTINGUISH_RX = _re.compile( r"(?:אבחנ(?:נו|תי|תה)|להבחין|שונה\s+מ|נסיבות\s+שונות|" r"לא\s+חל\b|אינ[הו]\s+חל|נסיבות\s+הענין\s+שונות|המקרה\s+שונה|" r"להבחין\s+בין)" ) _REL_AFFIRM_RX = _re.compile( r"(?:אימצ(?:נו|תי|ה)|יישמ(?:נו|תי|ה)|חוזרת?|לפי\s+ההלכה|" r"כאמור\s+ב|בעקבות\s+הלכת|כפי\s+שנקבע|כפי\s+שהובהר|המבחן\s+שנקבע|" r"בהתאם\s+ל(?:הלכה|פסק)|מאשר(?:ת)?)" ) def _classify_citation_context(text: str, cite: str) -> str: """Look at ~250 chars of context around the citation, classify.""" if not text or not cite: return "neutral" i = text.find(cite) if i < 0: return "neutral" context = text[max(0, i - 150): i + len(cite) + 150] # Order matters: overturn → distinguish → affirm → neutral if _REL_OVERTURN_RX.search(context): return "overturn" if _REL_DISTINGUISH_RX.search(context): return "distinguish" if _REL_AFFIRM_RX.search(context): return "affirm" return "neutral" # v2.99.116 — Law-specific classifier. Hebrew legal text around law # citations uses different vocabulary than around case-to-case citations. # Most law refs are "apply" by default; we look for specific signals of # interpretation / limitation / expansion. _LAW_REL_INTERPRET_RX = _re.compile( r"(?:מפרש(?:ים|ת|ים\s+את)?|פרשנות(?:\s+ה?סעיף|\s+ה?חוק)?|תכלית(?:\s+ה?סעיף|\s+ה?חוק|\s+החקיקה)?|" r"המשמעות\s+של|כיצד\s+יש\s+ל[הת]ב[יו]ן|לפרש\s+את\s+ה?סעיף)" ) _LAW_REL_LIMIT_RX = _re.compile( r"(?:אינו\s+חל\s+על|לא\s+חל\s+(?:כאן|בענייננו|במקרה)|מצומצם\s+ל|" r"אינ[הו]\s+פוגע[תים]?\s+ב|לא\s+חל[הת]?\s+על|הוראת\s+ה?סעיף\s+אינה\s+חלה|" r"חרי?ג\s+ל(?:סעיף|חוק)|מוצא\s+מתחולת)" ) _LAW_REL_EXPAND_RX = _re.compile( r"(?:חל\s+גם\s+על|כולל\s+(?:גם|כל)|מורחב[תים]?\s+ל|הרחבת\s+ה?(?:תחולה|הסעיף)|" r"באופן\s+רחב|פרשנות\s+מרחיבה|חל\s+על\s+כל)" ) def _classify_law_citation_context(text: str, match_str: str) -> str: """For law citations: apply (default) / interpret / limit / expand.""" if not text or not match_str: return "apply" i = text.find(match_str) if i < 0: return "apply" context = text[max(0, i - 150): i + len(match_str) + 200] if _LAW_REL_LIMIT_RX.search(context): return "limit" if _LAW_REL_EXPAND_RX.search(context): return "expand" if _LAW_REL_INTERPRET_RX.search(context): return "interpret" return "apply" @app.get("/v1/judgment/related") def judgment_related(cite: str, top: int = 8): # type: ignore """Sprint 1 — related cases for the open judgment. Returns: cited_by: cases that cite this one (sorted by recency where possible) Each tagged with relation: 'overturn'|'distinguish'|'affirm'|'neutral' cites: cases this one cites (basic list) n_in: total cases that cite this one n_out: total cites this case makes """ try: from ..pipeline import get_pipeline from ..citation_network import get_or_build pipe = get_pipeline() cn = get_or_build(pipe) raw = (cite or "").strip() if not raw: return JSONResponse(status_code=400, content={ "ok": False, "reason": "empty_cite", }) # Strip surrounding text → canonical cite prefix m = _JR_CASE_RX.search(raw) prefix = m.group(1).strip() if m else raw # Find docs that mention this cite citers_ids = cn.cited_by.get(prefix, []) or [] # Reverse: docs this cite-id cites cite_doc = cn.doc_for_citation.get(prefix) cited_by_us = (cn.cites.get(cite_doc) or []) if cite_doc else [] # Resolve metadata for each citer # v2.99.520 fix: Document.id (not .doc_id — that's Chunk's attribute). # Earlier code crashed with AttributeError on every call. docs = (getattr(pipe, "_indexed_docs", None) or getattr(pipe, "_docs", None) or []) doc_by_id = {getattr(d, "id", None) or getattr(d, "doc_id", None): d for d in docs} out_citers = [] for did in citers_ids[:max(top * 2, 16)]: # over-fetch d = doc_by_id.get(did) if not d: continue md = getattr(d, "metadata", None) or {} text = getattr(d, "text", "") or "" citation = md.get("citation") or did verdict_dt = md.get("verdict_dt", "") or "" rel = _classify_citation_context(text, prefix) out_citers.append({ "doc_id": did, "citation": citation, "court": md.get("court", ""), "verdict_dt": verdict_dt, "relation": rel, "snippet": text[max(0, text.find(prefix) - 80): text.find(prefix) + 120] if prefix in text else "", }) # Sort: overturn first (most surprising), then by date desc REL_ORDER = {"overturn": 0, "distinguish": 1, "affirm": 2, "neutral": 3} out_citers.sort(key=lambda r: (REL_ORDER.get(r["relation"], 9), -1 * (int(r["verdict_dt"][:4]) if r["verdict_dt"][:4].isdigit() else 0))) out_citers = out_citers[:top] return { "ok": True, "cite": prefix, "n_citers": len(citers_ids), "n_cited_by_us": len(cited_by_us), "citers": out_citers, "cites_out": [{"text": c.get("text"), "kind": c.get("kind")} for c in cited_by_us[:20]], } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) class _VerifyCitationsRequest(BaseModel): # type: ignore cites: List[str] @app.post("/v1/verify_citations") def verify_citations(body: _VerifyCitationsRequest): # type: ignore """Day 54 — bulk citation existence check, powers the ChatGPT hallucination filter. For each cite, returns: - status='verified_corpus' + doc_id (found in our corpus) - status='not_found_locally' (extension should try web) Designed to be fast: no parsing, no external fetches. Just a corpus index lookup per cite. Wikisource/Wikipedia/court.gov.il are checked by the extension itself (which has the helper privileges). """ try: from ..pipeline import get_pipeline from ..citation_network import get_or_build pipe = get_pipeline() cn = get_or_build(pipe) results = [] for raw_cite in (body.cites or [])[:100]: # cap at 100 per request raw = (raw_cite or "").strip() if not raw: results.append({"cite": raw_cite, "status": "empty"}) continue m = _JR_CASE_RX.search(raw) prefix = m.group(1).strip() if m else raw candidates = [ prefix, prefix.replace('"', '"').replace("'", "׳"), prefix.replace('"', '"').replace("'", "'"), _re.sub(r"\s+", "", prefix), ] doc_id = None for c in candidates: doc_id = cn.doc_for_citation.get(c) if doc_id: break if doc_id: results.append({ "cite": raw_cite, "prefix": prefix, "status": "verified_corpus", "doc_id": doc_id, }) else: results.append({ "cite": raw_cite, "prefix": prefix, "status": "not_found_locally", }) return {"results": results} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) @app.get("/v1/judgment/{doc_id:path}/render") def judgment_reader_render(doc_id: str): # type: ignore """Day 51 — full structured render of a single judgment for the reader modal. Cleans the corpus prefix, extracts citations, splits into paragraphs. """ try: from ..pipeline import get_pipeline pipe = get_pipeline() raw = pipe.get_text(doc_id) or "" if not raw: return JSONResponse(status_code=404, content={ "ok": False, "reason": "doc_not_found", }) # Clean leading corpus brackets + "פסיקה — citation (court):" marker cleaned = _JR_BRACKET_HEADER_RE.sub("", raw).lstrip() cleaned = _JR_PROSE_MARKER_RE.sub("", cleaned, count=1).lstrip() # Try to extract the citation that appears most prominently in # the original prefix — it's still in `raw` even after cleaning. cite_match = _JR_CASE_RX.search(raw) citation = cite_match.group(1) if cite_match else None # Pull court name from the bracket header (e.g. "[פסק דין עליון]") court = None court_match = _re.search( r"\[(פסק\s*דין\s*(?:עליון|מחוזי|השלום|הארצי\s*לעבודה|אזורי\s*לעבודה)[^\]]*)\]", raw) if court_match: court = court_match.group(1) # Split body into paragraphs paragraphs = [] for chunk in _JR_PARA_RX.split(cleaned): t = chunk.strip() if not t: continue # Pull a leading paragraph number/letter if present num_match = _re.match(r"^((?:\d+\.|[א-י][.)])\s)(.+)", t, _re.DOTALL) if num_match: paragraphs.append({ "num": num_match.group(1).strip(), "text": num_match.group(2).strip(), }) else: paragraphs.append({"num": None, "text": t}) # Extract unique citations across the cleaned body citations = [] seen = set() for m in _JR_CASE_RX.finditer(cleaned): c = m.group(1).strip() if c not in seen: seen.add(c) citations.append(c) # Build "open in" links via Google. # - Nevo: site-restricted (their pages DO contain citation text) # - "Free" path: plain Google search (NO site restriction). # court.gov.il uses opaque file IDs so site:court.gov.il # returns zero results. Plain Google reliably surfaces # pador.co.il + other free sources where the citation IS # indexed. def _nevo_url(cite): from urllib.parse import quote q = f'site:nevo.co.il "{cite}"' return f"https://www.google.com/search?q={quote(q)}" def _courts_url(cite): from urllib.parse import quote q = f'"{cite}"' return f"https://www.google.com/search?q={quote(q)}" return { "ok": True, "doc_id": doc_id, "citation": citation, "court": court, "n_paragraphs": len(paragraphs), "n_citations": len(citations), "n_chars": len(cleaned), "paragraphs": paragraphs[:200], # cap — anything more is suspicious "citations": citations[:60], "links": { "nevo": _nevo_url(citation) if citation else None, "courts": _courts_url(citation) if citation else None, }, } except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" }) class _CitationGraphRequest(BaseModel): # type: ignore doc_ids: List[str] = [] max_nodes: int = 7 @app.post("/v1/citation_graph") def citation_graph_subgraph(body: _CitationGraphRequest): # type: ignore """Day 49 Phase 2 — return a subgraph showing which cases in the provided list cite which others. Input: doc_ids — list of corpus doc_ids OR raw case-citation strings (e.g. "ע\"א 4011/97"). max_nodes caps the result. Output: { nodes: [{id, label, kind}], edges: [{from, to}] } Used by the topic-dossier's citation-graph mini-SVG to visualize how the cases in the result relate. """ try: from ..pipeline import get_pipeline from ..citation_network import get_or_build pipe = get_pipeline() cn = get_or_build(pipe) # Normalize input: dedupe + cap ids = list(dict.fromkeys(body.doc_ids))[: body.max_nodes] if not ids: return {"ok": True, "nodes": [], "edges": []} # Resolve each input id → doc_id in the network. # Many inputs come as citation strings ("ע\"א 4011/97") not doc_ids; # use cn.doc_for_citation to map citation→doc_id. resolved = {} for i in ids: doc_id = cn.doc_for_citation.get(i) or ( i if i in cn.cites else None) if doc_id: resolved[i] = doc_id # Build nodes (use the input string as label for readability) nodes = [{"id": i, "label": i, "kind": "case"} for i in ids] # Build edges: for each resolved doc, check its outgoing cites; # add an edge if the target is also in our input set. # Use both doc_id and citation-text matching since the inputs # might be a mix. inputs_set = set(ids) edges = [] for src_input, src_doc in resolved.items(): for c in cn.cites.get(src_doc, []): cite_text = c.get("text", "") # Target is in inputs by raw citation match, OR by # resolved doc-id match. tgt_input = None if cite_text in inputs_set: tgt_input = cite_text else: resolved_target = cn.doc_for_citation.get(cite_text) if resolved_target: for other_input, other_doc in resolved.items(): if other_doc == resolved_target and other_input != src_input: tgt_input = other_input break if tgt_input and tgt_input != src_input: edges.append({"from": src_input, "to": tgt_input}) return {"ok": True, "nodes": nodes, "edges": edges} except Exception as e: return JSONResponse(status_code=500, content={ "ok": False, "error": f"{type(e).__name__}: {e}" })