#!/bin/bash # One-shot commit for the v2.0 Hebrew RAG production milestone. # 24/24 regression tests pass. set -e cd "$( dirname "${BASH_SOURCE[0]}" )/.." echo "▸ Staging code changes..." git add \ pipeline.py \ api/fastapi_app.py \ generate/extractive.py \ middleware/adversarial_prompt.py \ static/index.html \ verify/citations.py \ verify/structural_nli.py \ scripts/deploy_hebrew_rag.sh \ scripts/test_hebrew_rag.sh \ scripts/commit_v2.sh \ HEBREW_RAG_RUNBOOK.md \ PRODUCTION_GAP_ANALYSIS.md echo "▸ Current diff summary:" git diff --cached --stat echo "" echo "▸ Creating commit..." git commit -m "$(cat <<'MSG' Hebrew RAG v2.0 — 24/24 regression tests pass Pipeline (retrieval correctness): - Symmetric language filter: Hebrew→Hebrew-only, English→drops Hebrew, empty-fallback returns no results (no more language leakage into EN queries). - "סעיף N" intent boost (+5.0 for statute, -0.5 for caselaw) + section sidecar that injects the actual §N statute from _indexed_docs when BM25 buries it behind shorter noisy chunks. - key_terms exact-match boost for Hebrew topical queries (gated to short queries without caselaw intent markers). - Global post-fusion filter pass + citation sidecar: honors query.filters across ALL retrievers (dense/gematria/hilbert don't natively filter). - "הלכת X" landmark sidecar: exact key_terms match → force-injection from supplement, guards landmark rulings against bulk caselaw referencing them by name. Data: - heb_law_supplement.jsonl: added [הלכת X] markers + key_terms for all 5 landmark rulings (apropim, bait_yules, kal_binyan, klinor, mivtahim). API: - /v1/suggestions (public, read-only) — frontend no longer hits the admin-gated /v1/admin/query_stats, eliminates 401 console noise. Frontend: - Chat history (localStorage, up to 50 conversations, search, replay) - Export to Markdown / Word (.doc HTML) / HTML / PDF (print) / Clipboard - Header buttons: 🕒 היסטוריה, + חדש, 📤 ייצוא Ops scripts: - deploy_hebrew_rag.sh restart → auto-detects empty index and re-uploads JSONL automatically (no more stale-index confusion) - test_hebrew_rag.sh: robust JSON-body builder via python3 json.dumps, no more set -e + quote-escaping silent failures. 24 tests total. Verifiers: - Skip disclaimer sentences from citation coverage / structural NLI scoring (avoid false-positive unsupported_claim alerts). Adversarial prompt detection: - 7 new Hebrew patterns (SEVERITY_CRITICAL) for "שכח/התעלם/תתעלם" + SYSTEM:/USER:/ASSISTANT: prefixes + mixed EN-HE injection chains. Co-Authored-By: Claude Opus 4.7 (1M context) MSG )" echo "" echo "▸ Tagging..." git tag -a heb-rag-v2.0 -m "Hebrew legal RAG — 24/24 regression tests, production-ready retrieval" echo "" echo "✅ Done. Commit + tag created." echo " git log -1 --oneline" git log -1 --oneline echo "" echo " git tag -l 'heb-rag-*'" git tag -l 'heb-rag-*'