Spaces:

voidful
/

RefCheck

Running

App Files Files Community

RefCheck / src /space_service.py

voidful

Add submission safety gate to reports

a82f053 verified about 16 hours ago

raw

history blame contribute delete

21.3 kB

	"""
	Non-interactive RefCheck workflow for Hugging Face Spaces.
	"""
	from __future__ import annotations

	import copy
	import tempfile
	from dataclasses import dataclass, field
	from functools import lru_cache
	from pathlib import Path
	from typing import Any
	from concurrent.futures import ThreadPoolExecutor, as_completed

	from main import (
	apply_fix,
	get_default_workflow,
	validate_entry,
	)
	from src.comparator import EntryReport, MetadataComparator
	from src.fetcher import (
	ArxivFetcher,
	CrossRefFetcher,
	DBLPFetcher,
	OpenAlexFetcher,
	ScholarFetcher,
	SemanticScholarFetcher,
	)
	from src.local_db import LocalConferenceDB
	from src.parser import BibEntry, BibParser
	from src.sanitizer import BibSanitizer, SanitizeFix


	@dataclass
	class RefCheckOptions:
	"""Options for a non-interactive RefCheck run."""

	remove_unverified: bool = True
	enable_google_scholar: bool = False
	max_workers: int = 4


	@dataclass
	class RefCheckResult:
	"""Artifacts and summary produced by a Space run."""

	source_stem: str = "references"
	total_input: int = 0
	total_output: int = 0
	verified: int = 0
	issues: int = 0
	not_found: int = 0
	entries: list[BibEntry] = field(default_factory=list)
	review_items: list[dict[str, Any]] = field(default_factory=list)
	fixed_details: dict[str, list[str]] = field(default_factory=dict)
	removed_details: list[tuple[str, str, str]] = field(default_factory=list)
	review_details: list[dict[str, Any]] = field(default_factory=list)
	duplicate_details: dict[str, list[str]] = field(default_factory=dict)
	sanitize_fixes: dict[str, list[SanitizeFix]] = field(default_factory=dict)
	local_matches: int = 0
	local_db_loaded: bool = False
	fixed_bib_path: str = ""
	report_path: str = ""
	report_markdown: str = ""


	def run_refcheck_file(file_path: str \| Path, options: RefCheckOptions \| None = None) -> RefCheckResult:
	"""Validate and fix an uploaded BibTeX file without interactive prompts."""
	options = options or RefCheckOptions()
	source_path = Path(file_path)
	parser = BibParser()
	entries = parser.parse_file(str(source_path))
	result = RefCheckResult(source_stem=source_path.stem or "references", total_input=len(entries))

	if not entries:
	result.report_markdown = "## RefCheck Report\n\nNo BibTeX entries were found."
	result.report_path = _write_report(result.report_markdown)
	result.fixed_bib_path = _write_bib(parser, [], result.source_stem)
	return result

	sanitizer = BibSanitizer()
	result.sanitize_fixes = sanitizer.sanitize_all(entries)
	_record_sanitize_fixes(result.fixed_details, result.sanitize_fixes)
	result.duplicate_details = sanitizer.find_duplicates(entries)

	result.local_db_loaded, api_entries, result.local_matches = _apply_local_db(entries, result.fixed_details)

	fetchers = _build_fetchers()
	workflow = get_default_workflow()
	for step in workflow.steps:
	if step.name == "google_scholar":
	step.enabled = options.enable_google_scholar

	comparator = MetadataComparator()
	analysis = _analyze_entries(api_entries, workflow, fetchers, comparator, options.max_workers)

	actions: dict[str, tuple[str, Any, list[Any]]] = {}

	for entry, best_result, candidates in analysis:
	if not best_result:
	actions[entry.key] = ("keep", None, [])
	elif best_result.is_match and best_result.fetched_data:
	actions[entry.key] = ("fix", best_result, candidates)
	elif candidates:
	actions[entry.key] = ("review", best_result, candidates)
	else:
	actions[entry.key] = ("remove", best_result, candidates)

	updated_entries: list[BibEntry] = []

	for entry in entries:
	action, best_result, candidates = actions.get(entry.key, ("keep", None, []))

	if action == "fix":
	changes = apply_fix(entry, best_result.fetched_data, all_candidates=candidates)
	if changes:
	result.fixed_details.setdefault(entry.key, []).extend(changes)
	updated_entries.append(entry)
	elif action == "review":
	result.review_items.append(_review_item(entry, best_result, candidates))
	updated_entries.append(entry)
	elif action == "remove":
	if options.remove_unverified:
	result.removed_details.append((entry.key, entry.title, "No matching metadata found in any source"))
	else:
	result.review_items.append(_review_item(entry, best_result, candidates))
	updated_entries.append(entry)
	else:
	updated_entries.append(entry)

	result.entries = updated_entries
	return finalize_result(result, options)


	def finalize_result(result: RefCheckResult, options: RefCheckOptions \| None = None) -> RefCheckResult:
	"""Write current entries, re-verify them, and refresh downloadable artifacts."""
	options = options or RefCheckOptions()
	parser = BibParser()
	fetchers = _build_fetchers()
	workflow = get_default_workflow()
	for step in workflow.steps:
	if step.name == "google_scholar":
	step.enabled = options.enable_google_scholar

	comparator = MetadataComparator()
	result.review_details = [_review_payload_from_item(item) for item in result.review_items]
	result.total_output = len(result.entries)
	fixed_path = _write_bib(parser, result.entries, result.source_stem)
	result.fixed_bib_path = fixed_path

	verified_entries = parser.parse_file(fixed_path)
	verification_reports = _verify_entries(
	verified_entries,
	workflow,
	fetchers,
	comparator,
	options.max_workers,
	)
	result.verified = sum(1 for r in verification_reports if r.comparison and r.comparison.is_match)
	result.issues = sum(1 for r in verification_reports if r.comparison and r.comparison.has_issues)
	result.not_found = sum(
	1
	for r in verification_reports
	if r.comparison and not r.comparison.is_match and not r.comparison.has_issues
	)

	result.report_markdown = _build_report(result, verification_reports)
	result.report_path = _write_report(result.report_markdown)
	return result


	def preview_review_action(
	result: RefCheckResult \| None,
	review_index: int,
	action: str,
	candidate_index: int \| None = None,
	options: RefCheckOptions \| None = None,
	) -> str:
	"""Preview and test a manual review action without mutating the session."""
	if not result or not result.review_items:
	return "No unresolved entries are available."
	if review_index < 0 or review_index >= len(result.review_items):
	return "Select an unresolved entry first."

	options = options or RefCheckOptions()
	item = result.review_items[review_index]
	entry = _find_entry(result.entries, item["entry_key"])
	if not entry:
	return "The selected entry is no longer in the working bibliography."

	if action == "keep":
	return _entry_preview_markdown(entry, "Keep original entry", ["No metadata changes will be applied."])
	if action == "remove":
	return _entry_preview_markdown(entry, "Remove entry", ["This entry will be removed from the exported BibTeX."])
	if action != "candidate":
	return "Select a candidate, keep, or remove action."

	candidates = item.get("candidates", [])
	if candidate_index is None or candidate_index < 0 or candidate_index >= len(candidates):
	return "Select a candidate first."

	candidate = candidates[candidate_index]
	if not _candidate_exact_match(candidate):
	return _entry_preview_markdown(
	entry,
	"Candidate blocked",
	[
	"This candidate is not an exact title/author/year match, so RefCheck will not auto-apply it.",
	f"Candidate source: {candidate.source}",
	f"Candidate confidence: {candidate.confidence:.2f}",
	*_candidate_issue_lines(candidate),
	],
	)

	temp_entry = copy.deepcopy(entry)
	changes = apply_fix(temp_entry, candidate.fetched_data, allow_optional_updates=True)
	if not changes:
	changes = ["No field-level changes are needed for this candidate."]

	fetchers = _build_fetchers()
	workflow = get_default_workflow()
	for step in workflow.steps:
	if step.name == "google_scholar":
	step.enabled = options.enable_google_scholar
	comparator = MetadataComparator()
	best_result, _ = validate_entry(temp_entry, workflow, fetchers, comparator)
	test_lines = [
	f"Candidate source: {candidate.source}",
	f"Candidate confidence before apply: {candidate.confidence:.2f}",
	]
	if best_result:
	test_lines.extend(
	[
	f"Verification source after apply: {best_result.source}",
	f"Verification confidence after apply: {best_result.confidence:.2f}",
	f"Verified after apply: {'yes' if best_result.is_match else 'no'}",
	]
	)
	if best_result.issues:
	test_lines.append(f"Remaining issues: {'; '.join(best_result.issues)}")

	return _entry_preview_markdown(temp_entry, "Candidate test", changes + test_lines)


	def apply_review_action(
	result: RefCheckResult \| None,
	review_index: int,
	action: str,
	candidate_index: int \| None = None,
	options: RefCheckOptions \| None = None,
	) -> RefCheckResult:
	"""Apply a manual review action to the working bibliography."""
	if not result or not result.review_items:
	raise ValueError("No unresolved entries are available.")
	if review_index < 0 or review_index >= len(result.review_items):
	raise ValueError("Select an unresolved entry first.")

	options = options or RefCheckOptions()
	item = result.review_items[review_index]
	entry = _find_entry(result.entries, item["entry_key"])
	if not entry:
	raise ValueError("The selected entry is no longer in the working bibliography.")

	if action == "candidate":
	candidates = item.get("candidates", [])
	if candidate_index is None or candidate_index < 0 or candidate_index >= len(candidates):
	raise ValueError("Select a candidate first.")
	candidate = candidates[candidate_index]
	if not _candidate_exact_match(candidate):
	raise ValueError(
	"Selected candidate is not an exact title/author/year match; RefCheck will not auto-overwrite core metadata."
	)
	changes = apply_fix(entry, candidate.fetched_data, allow_optional_updates=True)
	changes.append(f"Resolved manually with candidate from {candidate.source}.")
	result.fixed_details.setdefault(entry.key, []).extend(changes)
	elif action == "remove":
	result.entries = [existing for existing in result.entries if existing.key != entry.key]
	result.removed_details.append((entry.key, entry.title, "Removed during manual review"))
	elif action == "keep":
	result.fixed_details.setdefault(entry.key, []).append("Marked as manually reviewed; kept original entry.")
	else:
	raise ValueError("Select a candidate, keep, or remove action.")

	del result.review_items[review_index]
	return finalize_result(result, options)


	def _find_entry(entries: list[BibEntry], key: str) -> BibEntry \| None:
	for entry in entries:
	if entry.key == key:
	return entry
	return None


	def _candidate_exact_match(candidate: Any) -> bool:
	return bool(
	candidate
	and getattr(candidate, "is_match", False)
	and getattr(candidate, "title_match", False)
	and getattr(candidate, "author_match", False)
	and getattr(candidate, "year_match", False)
	and not getattr(candidate, "author_initial_conflict", False)
	)


	def _candidate_issue_lines(candidate: Any) -> list[str]:
	lines = list(getattr(candidate, "issues", []) or [])
	if not getattr(candidate, "title_match", False):
	lines.append("Title is not an exact-enough match")
	if not getattr(candidate, "author_match", False):
	lines.append("Authors are not an exact-enough match")
	if not getattr(candidate, "year_match", False):
	bib_year = getattr(candidate, "bib_year", "") or "[missing]"
	fetched_year = getattr(candidate, "fetched_year", "") or "[missing]"
	lines.append(f"Year mismatch: bib={bib_year}, candidate={fetched_year}")
	return [f"Blocking issue: {line}" for line in dict.fromkeys(lines)]


	def _entry_preview_markdown(entry: BibEntry, title: str, lines: list[str]) -> str:
	body = "\n".join(f"- {line}" for line in lines)
	return (
	f"### {title}\n\n"
	f"Key: `{entry.key}`\n\n"
	f"Title: {entry.title or '[missing]'}\n\n"
	f"Authors: {entry.author or '[missing]'}\n\n"
	f"Year: {entry.year or '[missing]'}\n\n"
	f"{body}"
	)


	def _build_fetchers() -> dict[str, Any]:
	return {
	"arxiv": ArxivFetcher(),
	"crossref": CrossRefFetcher(),
	"scholar": ScholarFetcher(),
	"semantic": SemanticScholarFetcher(),
	"openalex": OpenAlexFetcher(),
	"dblp": DBLPFetcher(),
	}


	def _analyze_entries(
	entries: list[BibEntry],
	workflow: Any,
	fetchers: dict[str, Any],
	comparator: MetadataComparator,
	max_workers: int,
	) -> list[tuple[BibEntry, Any, list[Any]]]:
	if not entries:
	return []

	analysis: list[tuple[BibEntry, Any, list[Any]]] = []
	worker_count = min(max(1, max_workers), len(entries))
	with ThreadPoolExecutor(max_workers=worker_count) as executor:
	futures = {
	executor.submit(validate_entry, entry, workflow, fetchers, comparator): entry
	for entry in entries
	}
	for future in as_completed(futures):
	entry = futures[future]
	try:
	best_result, candidates = future.result()
	except Exception:
	best_result, candidates = None, []
	analysis.append((entry, best_result, candidates))
	return analysis


	def _verify_entries(
	entries: list[BibEntry],
	workflow: Any,
	fetchers: dict[str, Any],
	comparator: MetadataComparator,
	max_workers: int,
	) -> list[EntryReport]:
	reports: list[EntryReport] = []
	for entry, best_result, _ in _analyze_entries(entries, workflow, fetchers, comparator, max_workers):
	reports.append(EntryReport(entry=entry, comparison=best_result))
	return reports


	def _record_sanitize_fixes(
	fixed_details: dict[str, list[str]],
	sanitize_fixes: dict[str, list[SanitizeFix]],
	) -> None:
	for key, fixes in sanitize_fixes.items():
	fixed_details.setdefault(key, [])
	fixed_details[key].extend(fix.description for fix in fixes)


	def _apply_local_db(
	entries: list[BibEntry],
	fixed_details: dict[str, list[str]],
	) -> tuple[bool, list[BibEntry], int]:
	local_db = _load_local_db()
	if not local_db.is_loaded:
	return False, entries, 0

	match_count = 0
	for entry in entries:
	official = local_db.lookup(entry.title)
	if official:
	match_count += 1

	return True, entries, match_count


	@lru_cache(maxsize=1)
	def _load_local_db() -> LocalConferenceDB:
	local_db = LocalConferenceDB()
	local_db.load()
	return local_db


	def _review_item(entry: BibEntry, best_result: Any, candidates: list[Any]) -> dict[str, Any]:
	sorted_candidates = sorted(candidates, key=lambda item: item.confidence, reverse=True)
	return {
	"entry_key": entry.key,
	"entry": entry,
	"best_result": best_result,
	"candidates": sorted_candidates,
	}


	def _review_payload_from_item(item: dict[str, Any]) -> dict[str, Any]:
	return _review_payload(
	item["entry"],
	item.get("best_result"),
	item.get("candidates", []),
	)


	def _review_payload(entry: BibEntry, best_result: Any, candidates: list[Any]) -> dict[str, Any]:
	return {
	"key": entry.key,
	"title": entry.title,
	"reason": "; ".join(best_result.issues) if best_result and best_result.issues else "Ambiguous match",
	"candidates": [
	{
	"source": candidate.source,
	"confidence": candidate.confidence,
	"title": getattr(candidate.fetched_data, "title", ""),
	"year": getattr(candidate.fetched_data, "year", ""),
	"doi": getattr(candidate.fetched_data, "doi", ""),
	}
	for candidate in candidates[:5]
	],
	}


	def _write_bib(parser: BibParser, entries: list[BibEntry], original_stem: str) -> str:
	out_dir = Path(tempfile.mkdtemp(prefix="refcheck_"))
	out_path = out_dir / f"{original_stem or 'references'}_refcheck_fixed.bib"
	parser.save_entries(str(out_path), entries)
	return str(out_path)


	def _write_report(markdown: str) -> str:
	out_dir = Path(tempfile.mkdtemp(prefix="refcheck_report_"))
	out_path = out_dir / "refcheck_report.md"
	out_path.write_text(markdown, encoding="utf-8")
	return str(out_path)


	def _build_report(result: RefCheckResult, reports: list[EntryReport]) -> str:
	lines = [
	"## RefCheck Report",
	"",
	"### Summary",
	"",
	f"- Input entries: {result.total_input}",
	f"- Output entries: {result.total_output}",
	f"- Verified after fix: {result.verified}",
	f"- Remaining issues: {result.issues}",
	f"- Not found after fix: {result.not_found}",
	f"- Local DB loaded: {'yes' if result.local_db_loaded else 'no'}",
	f"- Local DB matches: {result.local_matches}",
	"",
	]

	gate_status, gate_reasons = _submission_safety_gate(result)
	lines.extend(["### Submission Safety Gate", ""])
	lines.append(f"- Status: {gate_status}")
	for reason in gate_reasons:
	lines.append(f"- {reason}")
	lines.append("")

	if result.removed_details:
	lines.extend(["### Removed", ""])
	for key, title, reason in result.removed_details:
	lines.append(f"- `{key}`: {title} ({reason})")
	lines.append("")

	if result.fixed_details:
	lines.extend(["### Fixed", ""])
	for key, changes in sorted(result.fixed_details.items()):
	lines.append(f"- `{key}`")
	for change in changes:
	lines.append(f" - {change}")
	lines.append("")

	if result.duplicate_details:
	lines.extend(["### Duplicate Titles", ""])
	for title, keys in result.duplicate_details.items():
	lines.append(f"- `{', '.join(keys)}`: {title}")
	lines.append("")

	if result.review_details:
	lines.extend(["### Needs Review", ""])
	for item in result.review_details:
	lines.append(f"- `{item['key']}`: {item['title']}")
	lines.append(f" - Reason: {item['reason']}")
	for candidate in item["candidates"]:
	lines.append(
	" - Candidate: "
	f"{candidate['source']} "
	f"(confidence {candidate['confidence']:.2f}) "
	f"{candidate['title']} "
	f"{candidate['year']} "
	f"{candidate['doi']}".strip()
	)
	lines.append("")

	remaining = [
	report
	for report in reports
	if report.comparison and not report.comparison.is_match
	]
	if remaining:
	lines.extend(["### Verification Issues", ""])
	for report in remaining:
	comparison = report.comparison
	issues = "; ".join(comparison.issues) if comparison.issues else "Not matched"
	lines.append(
	f"- `{report.entry.key}` via {comparison.source} "
	f"(confidence {comparison.confidence:.2f}): {issues}"
	)
	lines.append("")

	return "\n".join(lines).strip() + "\n"


	def _submission_safety_gate(result: RefCheckResult) -> tuple[str, list[str]]:
	reasons = []
	if result.review_details:
	reasons.append(f"FAIL: {len(result.review_details)} reference(s) still need manual review.")
	if result.issues:
	reasons.append(f"FAIL: {result.issues} reference(s) still have strict verification issues.")
	if result.not_found:
	reasons.append(f"FAIL: {result.not_found} reference(s) could not be found in configured sources.")
	if result.removed_details:
	reasons.append(
	f"FAIL: {len(result.removed_details)} reference(s) were removed; confirm the paper text no longer cites them."
	)
	if result.total_output and result.verified != result.total_output:
	reasons.append(f"FAIL: only {result.verified}/{result.total_output} output reference(s) are strictly verified.")
	if result.duplicate_details:
	reasons.append(f"WARN: {len(result.duplicate_details)} duplicate title group(s) should be checked.")

	failures = [reason for reason in reasons if reason.startswith("FAIL")]
	if failures:
	return "FAIL - do not submit yet", reasons
	return "PASS - all output references are strictly verified", reasons or ["PASS: no unresolved reference risks detected."]