"""Citation coverage checker. Simple, deterministic: counts how many sentences in the answer reference a source marker like [1], [2] etc. For production swap with a claim-level alignment pass over spans. """ from __future__ import annotations import re from typing import List from ..core.types import Query, Retrieved, VerificationResult, VerificationAlert CITE_RE = re.compile(r"\[(\d+)\]") SENT_RE = re.compile(r"[.!?…]+|\n+") class CitationChecker: name = "citations" def __init__(self, min_coverage: float = 0.8) -> None: self.min_coverage = min_coverage def verify_answer(self, answer: str, sources: List[Retrieved]) -> VerificationResult: if not answer.strip(): return VerificationResult(passed=False, citation_coverage=0.0) sentences = [s.strip() for s in SENT_RE.split(answer) if s.strip()] if not sentences: return VerificationResult(passed=False, citation_coverage=0.0) # v2.x — skip disclaimer/footer sentences from citation-coverage # math. These are meta-content (e.g. "אין באמור ייעוץ משפטי...") # that intentionally carry no citation and shouldn't drag coverage # below the pass threshold. _SKIP_MARKERS = ( "אין באמור ייעוץ משפטי", # Hebrew legal disclaimer "disclaimer:", # generic English disclaimer "— — —", # visual separator before disclaimer "—\u00a0—\u00a0—", # NBSP variant ) def _is_meta(s: str) -> bool: low = s.lower() return any(m in s or m in low for m in _SKIP_MARKERS) countable = [s for s in sentences if not _is_meta(s)] if not countable: return VerificationResult(passed=True, citation_coverage=1.0) cited = sum(1 for s in countable if CITE_RE.search(s)) coverage = cited / len(countable) passed = coverage >= self.min_coverage alerts: List[VerificationAlert] = [] if not passed: alerts.append(VerificationAlert( type="low_citation_coverage", risk="MEDIUM", impact=f"Only {coverage:.0%} of sentences are cited " f"(minimum {self.min_coverage:.0%}).", detail={"sentences": len(sentences), "cited": cited}, )) return VerificationResult( passed=passed, alerts=alerts, citation_coverage=coverage, faithfulness=0.0, structure_match=True, ) def verify(self, query: Query, answer: str, context: List[Retrieved]) -> VerificationResult: return self.verify_answer(answer, context)