"""Faithfulness / NLI check. Adapter around an optional NLI model (HuggingFace pipeline). Falls back to a Jaccard overlap between answer tokens and the union of source tokens. """ from __future__ import annotations import re from typing import Any, List, Optional from ..core.types import Query, Retrieved, VerificationResult, VerificationAlert _TOKEN_RE = re.compile(r"\w+", re.UNICODE) def _tokens(s: str) -> set: return {t.lower() for t in _TOKEN_RE.findall(s)} class FaithfulnessChecker: name = "faithfulness" def __init__( self, nli_pipeline: Optional[Any] = None, min_score: float = 0.5, ) -> None: self.nli = nli_pipeline self.min_score = min_score def _jaccard(self, answer: str, sources: List[Retrieved]) -> float: a = _tokens(answer) if not a: return 0.0 s = set() for r in sources: s |= _tokens(r.chunk.text) if not s: return 0.0 return len(a & s) / len(a | s) def verify_answer(self, answer: str, sources: List[Retrieved]) -> VerificationResult: score = 0.0 if self.nli is not None: try: premises = " ".join(r.chunk.text for r in sources[:5]) out = self.nli(f"{premises}{answer}") # typical NLI pipeline output: {label, score} label = out[0].get("label", "").lower() if isinstance(out, list) else "" raw = float(out[0].get("score", 0.0)) if isinstance(out, list) else 0.0 score = raw if "entail" in label else 1 - raw except Exception: score = self._jaccard(answer, sources) else: score = self._jaccard(answer, sources) passed = score >= self.min_score alerts: List[VerificationAlert] = [] if not passed: alerts.append(VerificationAlert( type="low_faithfulness", risk="MEDIUM", impact=f"Answer may not be fully supported (score={score:.2f}).", )) return VerificationResult( passed=passed, alerts=alerts, citation_coverage=0.0, faithfulness=score, structure_match=True, ) def verify(self, query: Query, answer: str, context: List[Retrieved]) -> VerificationResult: return self.verify_answer(answer, context)