Spaces:

voidful
/

RefCheck

Sleeping

App Files Files Community

voidful commited on 3 days ago

Commit

a2ec1b5

verified ·

1 Parent(s): 740846d

Add manual review controls

Browse files

Files changed (2) hide show

app.py +226 -3
src/space_service.py +163 -13

app.py CHANGED Viewed

@@ -6,7 +6,13 @@ from typing import Any
 import gradio as gr
-from src.space_service import RefCheckOptions, run_refcheck_file
 def _uploaded_path(uploaded: Any) -> str | None:
@@ -44,8 +50,179 @@ def process_bib(
         return f"## RefCheck Report\n\nProcessing failed: `{exc}`", None, None
 with gr.Blocks(title="RefCheck") as demo:
     gr.Markdown("# RefCheck")
     with gr.Row():
         with gr.Column(scale=1):
@@ -55,8 +232,8 @@ with gr.Blocks(title="RefCheck") as demo:
                 type="filepath",
             )
             remove_unverified = gr.Checkbox(
-                label="Remove unverifiable entries",
-                value=True,
             )
             enable_google_scholar = gr.Checkbox(
                 label="Google Scholar fallback",
@@ -70,18 +247,64 @@ with gr.Blocks(title="RefCheck") as demo:
                 value=4,
             )
             run_button = gr.Button("Run RefCheck", variant="primary")
         with gr.Column(scale=2):
             report = gr.Markdown(label="Report")
             fixed_bib = gr.File(label="Fixed BibTeX")
             report_file = gr.File(label="Markdown report")
     run_button.click(
         fn=process_bib,
         inputs=[bib_file, remove_unverified, enable_google_scholar, max_workers],
         outputs=[report, fixed_bib, report_file],
         api_name="refcheck",
     )
 def launch() -> None:

 import gradio as gr
+from src.space_service import (
+    RefCheckOptions,
+    RefCheckResult,
+    apply_review_action,
+    preview_review_action,
+    run_refcheck_file,
+)
 def _uploaded_path(uploaded: Any) -> str | None:
         return f"## RefCheck Report\n\nProcessing failed: `{exc}`", None, None
+def process_bib_for_ui(
+    uploaded: Any,
+    remove_unverified: bool,
+    enable_google_scholar: bool,
+    max_workers: int,
+):
+    file_path = _uploaded_path(uploaded)
+    if not file_path:
+        report = "## RefCheck Report\n\nNo BibTeX file was uploaded."
+        return report, None, None, None, gr.update(choices=[], value=None), gr.update(choices=[], value=None), ""
+    try:
+        options = _options(remove_unverified, enable_google_scholar, max_workers)
+        result = run_refcheck_file(Path(file_path), options)
+        return _ui_outputs(result)
+    except Exception as exc:
+        report = f"## RefCheck Report\n\nProcessing failed: `{exc}`"
+        return report, None, None, None, gr.update(choices=[], value=None), gr.update(choices=[], value=None), ""
+def update_review_controls(review_label: str | None, result: RefCheckResult | None):
+    action_choices = _action_choices(result, review_label)
+    action_value = action_choices[0] if action_choices else None
+    preview = _review_summary(result, review_label)
+    return gr.update(choices=action_choices, value=action_value), preview
+def preview_selected_review(
+    review_label: str | None,
+    action_label: str | None,
+    result: RefCheckResult | None,
+    remove_unverified: bool,
+    enable_google_scholar: bool,
+    max_workers: int,
+) -> str:
+    review_index = _parse_review_index(review_label)
+    action, candidate_index = _parse_action(action_label)
+    options = _options(remove_unverified, enable_google_scholar, max_workers)
+    return preview_review_action(result, review_index, action, candidate_index, options)
+def apply_selected_review(
+    review_label: str | None,
+    action_label: str | None,
+    result: RefCheckResult | None,
+    remove_unverified: bool,
+    enable_google_scholar: bool,
+    max_workers: int,
+):
+    review_index = _parse_review_index(review_label)
+    action, candidate_index = _parse_action(action_label)
+    options = _options(remove_unverified, enable_google_scholar, max_workers)
+    try:
+        updated = apply_review_action(result, review_index, action, candidate_index, options)
+    except Exception as exc:
+        preview = f"### Manual review\n\n{exc}"
+        return (
+            result.report_markdown if result else "",
+            result.fixed_bib_path if result else None,
+            result.report_path if result else None,
+            result,
+            gr.update(),
+            gr.update(),
+            preview,
+        )
+    return _ui_outputs(updated)
+def _options(remove_unverified: bool, enable_google_scholar: bool, max_workers: int) -> RefCheckOptions:
+    return RefCheckOptions(
+        remove_unverified=remove_unverified,
+        enable_google_scholar=enable_google_scholar,
+        max_workers=int(max_workers),
+    )
+def _ui_outputs(result: RefCheckResult):
+    review_choices = _review_choices(result)
+    review_value = review_choices[0] if review_choices else None
+    action_choices = _action_choices(result, review_value)
+    action_value = action_choices[0] if action_choices else None
+    preview = _review_summary(result, review_value)
+    return (
+        result.report_markdown,
+        result.fixed_bib_path,
+        result.report_path,
+        result,
+        gr.update(choices=review_choices, value=review_value),
+        gr.update(choices=action_choices, value=action_value),
+        preview,
+    )
+def _review_choices(result: RefCheckResult | None) -> list[str]:
+    if not result:
+        return []
+    choices = []
+    for idx, item in enumerate(result.review_items, 1):
+        entry = item["entry"]
+        title = entry.title or "[missing title]"
+        if len(title) > 90:
+            title = title[:87] + "..."
+        choices.append(f"{idx}. {entry.key}: {title}")
+    return choices
+def _action_choices(result: RefCheckResult | None, review_label: str | None) -> list[str]:
+    review_index = _parse_review_index(review_label)
+    if not result or review_index < 0 or review_index >= len(result.review_items):
+        return []
+    item = result.review_items[review_index]
+    choices = []
+    for idx, candidate in enumerate(item.get("candidates", []), 1):
+        fetched = candidate.fetched_data
+        title = getattr(fetched, "title", "") or "[missing title]"
+        if len(title) > 72:
+            title = title[:69] + "..."
+        choices.append(f"Candidate {idx}: {candidate.source} ({candidate.confidence:.2f}) {title}")
+    choices.extend(["Keep original", "Remove entry"])
+    return choices
+def _review_summary(result: RefCheckResult | None, review_label: str | None) -> str:
+    review_index = _parse_review_index(review_label)
+    if not result or not result.review_items:
+        return "### Manual review\n\nNo unresolved entries."
+    if review_index < 0 or review_index >= len(result.review_items):
+        return "### Manual review\n\nSelect an unresolved entry."
+    item = result.review_items[review_index]
+    entry = item["entry"]
+    best = item.get("best_result")
+    reason = "; ".join(best.issues) if best and best.issues else "Ambiguous match"
+    return (
+        "### Manual review\n\n"
+        f"**Key:** `{entry.key}`\n\n"
+        f"**Title:** {entry.title or '[missing]'}\n\n"
+        f"**Authors:** {entry.author or '[missing]'}\n\n"
+        f"**Year:** {entry.year or '[missing]'}\n\n"
+        f"**Reason:** {reason}\n\n"
+        f"**Candidates:** {len(item.get('candidates', []))}"
+    )
+def _parse_review_index(review_label: str | None) -> int:
+    if not review_label:
+        return -1
+    try:
+        return int(review_label.split(".", 1)[0]) - 1
+    except Exception:
+        return -1
+def _parse_action(action_label: str | None) -> tuple[str, int | None]:
+    if not action_label:
+        return "", None
+    if action_label.startswith("Candidate "):
+        try:
+            number = int(action_label.split(":", 1)[0].replace("Candidate", "").strip())
+        except Exception:
+            return "", None
+        return "candidate", number - 1
+    if action_label == "Keep original":
+        return "keep", None
+    if action_label == "Remove entry":
+        return "remove", None
+    return "", None
 with gr.Blocks(title="RefCheck") as demo:
     gr.Markdown("# RefCheck")
+    session_state = gr.State(None)
     with gr.Row():
         with gr.Column(scale=1):
                 type="filepath",
             )
             remove_unverified = gr.Checkbox(
+                label="Remove entries with no candidates",
+                value=False,
             )
             enable_google_scholar = gr.Checkbox(
                 label="Google Scholar fallback",
                 value=4,
             )
             run_button = gr.Button("Run RefCheck", variant="primary")
+            api_button = gr.Button(visible=False)
         with gr.Column(scale=2):
             report = gr.Markdown(label="Report")
             fixed_bib = gr.File(label="Fixed BibTeX")
             report_file = gr.File(label="Markdown report")
+            with gr.Accordion("Manual review", open=True):
+                review_entry = gr.Dropdown(label="Unresolved entry", choices=[])
+                review_action = gr.Radio(label="Candidate/action", choices=[])
+                with gr.Row():
+                    test_button = gr.Button("Test selected")
+                    apply_button = gr.Button("Apply selected", variant="primary")
+                review_preview = gr.Markdown()
     run_button.click(
+        fn=process_bib_for_ui,
+        inputs=[bib_file, remove_unverified, enable_google_scholar, max_workers],
+        outputs=[report, fixed_bib, report_file, session_state, review_entry, review_action, review_preview],
+        api_visibility="private",
+    )
+    api_button.click(
         fn=process_bib,
         inputs=[bib_file, remove_unverified, enable_google_scholar, max_workers],
         outputs=[report, fixed_bib, report_file],
         api_name="refcheck",
     )
+    review_entry.change(
+        fn=update_review_controls,
+        inputs=[review_entry, session_state],
+        outputs=[review_action, review_preview],
+        api_visibility="private",
+    )
+    test_button.click(
+        fn=preview_selected_review,
+        inputs=[
+            review_entry,
+            review_action,
+            session_state,
+            remove_unverified,
+            enable_google_scholar,
+            max_workers,
+        ],
+        outputs=review_preview,
+        api_visibility="private",
+    )
+    apply_button.click(
+        fn=apply_selected_review,
+        inputs=[
+            review_entry,
+            review_action,
+            session_state,
+            remove_unverified,
+            enable_google_scholar,
+            max_workers,
+        ],
+        outputs=[report, fixed_bib, report_file, session_state, review_entry, review_action, review_preview],
+        api_visibility="private",
+    )
 def launch() -> None:

src/space_service.py CHANGED Viewed

@@ -3,6 +3,7 @@ Non-interactive RefCheck workflow for Hugging Face Spaces.
 """
 from __future__ import annotations
 import tempfile
 from dataclasses import dataclass, field
 from functools import lru_cache
@@ -43,11 +44,14 @@ class RefCheckOptions:
 class RefCheckResult:
     """Artifacts and summary produced by a Space run."""
     total_input: int = 0
     total_output: int = 0
     verified: int = 0
     issues: int = 0
     not_found: int = 0
     fixed_details: dict[str, list[str]] = field(default_factory=dict)
     removed_details: list[tuple[str, str, str]] = field(default_factory=list)
     review_details: list[dict[str, Any]] = field(default_factory=list)
@@ -66,12 +70,12 @@ def run_refcheck_file(file_path: str | Path, options: RefCheckOptions | None = N
     source_path = Path(file_path)
     parser = BibParser()
     entries = parser.parse_file(str(source_path))
-    result = RefCheckResult(total_input=len(entries))
     if not entries:
         result.report_markdown = "## RefCheck Report\n\nNo BibTeX entries were found."
         result.report_path = _write_report(result.report_markdown)
-        result.fixed_bib_path = _write_bib(parser, [], source_path.stem)
         return result
     sanitizer = BibSanitizer()
@@ -117,26 +121,35 @@ def run_refcheck_file(file_path: str | Path, options: RefCheckOptions | None = N
                 result.fixed_details.setdefault(entry.key, []).extend(changes)
             updated_entries.append(entry)
         elif action == "review":
-            result.review_details.append(_review_payload(entry, best_result, candidates))
             updated_entries.append(entry)
         elif action == "remove":
             if options.remove_unverified:
                 result.removed_details.append((entry.key, entry.title, "No matching metadata found in any source"))
             else:
-                result.review_details.append(
-                    {
-                        "key": entry.key,
-                        "title": entry.title,
-                        "reason": "No matching metadata found in any source",
-                        "candidates": [],
-                    }
-                )
                 updated_entries.append(entry)
         else:
             updated_entries.append(entry)
-    result.total_output = len(updated_entries)
-    fixed_path = _write_bib(parser, updated_entries, source_path.stem)
     result.fixed_bib_path = fixed_path
     verified_entries = parser.parse_file(fixed_path)
@@ -160,6 +173,125 @@ def run_refcheck_file(file_path: str | Path, options: RefCheckOptions | None = N
     return result
 def _build_fetchers() -> dict[str, Any]:
     return {
         "arxiv": ArxivFetcher(),
@@ -251,6 +383,24 @@ def _load_local_db() -> LocalConferenceDB:
     return local_db
 def _review_payload(entry: BibEntry, best_result: Any, candidates: list[Any]) -> dict[str, Any]:
     return {
         "key": entry.key,

 """
 from __future__ import annotations
+import copy
 import tempfile
 from dataclasses import dataclass, field
 from functools import lru_cache
 class RefCheckResult:
     """Artifacts and summary produced by a Space run."""
+    source_stem: str = "references"
     total_input: int = 0
     total_output: int = 0
     verified: int = 0
     issues: int = 0
     not_found: int = 0
+    entries: list[BibEntry] = field(default_factory=list)
+    review_items: list[dict[str, Any]] = field(default_factory=list)
     fixed_details: dict[str, list[str]] = field(default_factory=dict)
     removed_details: list[tuple[str, str, str]] = field(default_factory=list)
     review_details: list[dict[str, Any]] = field(default_factory=list)
     source_path = Path(file_path)
     parser = BibParser()
     entries = parser.parse_file(str(source_path))
+    result = RefCheckResult(source_stem=source_path.stem or "references", total_input=len(entries))
     if not entries:
         result.report_markdown = "## RefCheck Report\n\nNo BibTeX entries were found."
         result.report_path = _write_report(result.report_markdown)
+        result.fixed_bib_path = _write_bib(parser, [], result.source_stem)
         return result
     sanitizer = BibSanitizer()
                 result.fixed_details.setdefault(entry.key, []).extend(changes)
             updated_entries.append(entry)
         elif action == "review":
+            result.review_items.append(_review_item(entry, best_result, candidates))
             updated_entries.append(entry)
         elif action == "remove":
             if options.remove_unverified:
                 result.removed_details.append((entry.key, entry.title, "No matching metadata found in any source"))
             else:
+                result.review_items.append(_review_item(entry, best_result, candidates))
                 updated_entries.append(entry)
         else:
             updated_entries.append(entry)
+    result.entries = updated_entries
+    return finalize_result(result, options)
+def finalize_result(result: RefCheckResult, options: RefCheckOptions | None = None) -> RefCheckResult:
+    """Write current entries, re-verify them, and refresh downloadable artifacts."""
+    options = options or RefCheckOptions()
+    parser = BibParser()
+    fetchers = _build_fetchers()
+    workflow = get_default_workflow()
+    for step in workflow.steps:
+        if step.name == "google_scholar":
+            step.enabled = options.enable_google_scholar
+    comparator = MetadataComparator()
+    result.review_details = [_review_payload_from_item(item) for item in result.review_items]
+    result.total_output = len(result.entries)
+    fixed_path = _write_bib(parser, result.entries, result.source_stem)
     result.fixed_bib_path = fixed_path
     verified_entries = parser.parse_file(fixed_path)
     return result
+def preview_review_action(
+    result: RefCheckResult | None,
+    review_index: int,
+    action: str,
+    candidate_index: int | None = None,
+    options: RefCheckOptions | None = None,
+) -> str:
+    """Preview and test a manual review action without mutating the session."""
+    if not result or not result.review_items:
+        return "No unresolved entries are available."
+    if review_index < 0 or review_index >= len(result.review_items):
+        return "Select an unresolved entry first."
+    options = options or RefCheckOptions()
+    item = result.review_items[review_index]
+    entry = _find_entry(result.entries, item["entry_key"])
+    if not entry:
+        return "The selected entry is no longer in the working bibliography."
+    if action == "keep":
+        return _entry_preview_markdown(entry, "Keep original entry", ["No metadata changes will be applied."])
+    if action == "remove":
+        return _entry_preview_markdown(entry, "Remove entry", ["This entry will be removed from the exported BibTeX."])
+    if action != "candidate":
+        return "Select a candidate, keep, or remove action."
+    candidates = item.get("candidates", [])
+    if candidate_index is None or candidate_index < 0 or candidate_index >= len(candidates):
+        return "Select a candidate first."
+    candidate = candidates[candidate_index]
+    temp_entry = copy.deepcopy(entry)
+    changes = apply_fix(temp_entry, candidate.fetched_data)
+    if not changes:
+        changes = ["No field-level changes are needed for this candidate."]
+    fetchers = _build_fetchers()
+    workflow = get_default_workflow()
+    for step in workflow.steps:
+        if step.name == "google_scholar":
+            step.enabled = options.enable_google_scholar
+    comparator = MetadataComparator()
+    best_result, _ = validate_entry(temp_entry, workflow, fetchers, comparator)
+    test_lines = [
+        f"Candidate source: {candidate.source}",
+        f"Candidate confidence before apply: {candidate.confidence:.2f}",
+    ]
+    if best_result:
+        test_lines.extend(
+            [
+                f"Verification source after apply: {best_result.source}",
+                f"Verification confidence after apply: {best_result.confidence:.2f}",
+                f"Verified after apply: {'yes' if best_result.is_match else 'no'}",
+            ]
+        )
+        if best_result.issues:
+            test_lines.append(f"Remaining issues: {'; '.join(best_result.issues)}")
+    return _entry_preview_markdown(temp_entry, "Candidate test", changes + test_lines)
+def apply_review_action(
+    result: RefCheckResult | None,
+    review_index: int,
+    action: str,
+    candidate_index: int | None = None,
+    options: RefCheckOptions | None = None,
+) -> RefCheckResult:
+    """Apply a manual review action to the working bibliography."""
+    if not result or not result.review_items:
+        raise ValueError("No unresolved entries are available.")
+    if review_index < 0 or review_index >= len(result.review_items):
+        raise ValueError("Select an unresolved entry first.")
+    options = options or RefCheckOptions()
+    item = result.review_items[review_index]
+    entry = _find_entry(result.entries, item["entry_key"])
+    if not entry:
+        raise ValueError("The selected entry is no longer in the working bibliography.")
+    if action == "candidate":
+        candidates = item.get("candidates", [])
+        if candidate_index is None or candidate_index < 0 or candidate_index >= len(candidates):
+            raise ValueError("Select a candidate first.")
+        candidate = candidates[candidate_index]
+        changes = apply_fix(entry, candidate.fetched_data)
+        changes.append(f"Resolved manually with candidate from {candidate.source}.")
+        result.fixed_details.setdefault(entry.key, []).extend(changes)
+    elif action == "remove":
+        result.entries = [existing for existing in result.entries if existing.key != entry.key]
+        result.removed_details.append((entry.key, entry.title, "Removed during manual review"))
+    elif action == "keep":
+        result.fixed_details.setdefault(entry.key, []).append("Marked as manually reviewed; kept original entry.")
+    else:
+        raise ValueError("Select a candidate, keep, or remove action.")
+    del result.review_items[review_index]
+    return finalize_result(result, options)
+def _find_entry(entries: list[BibEntry], key: str) -> BibEntry | None:
+    for entry in entries:
+        if entry.key == key:
+            return entry
+    return None
+def _entry_preview_markdown(entry: BibEntry, title: str, lines: list[str]) -> str:
+    body = "\n".join(f"- {line}" for line in lines)
+    return (
+        f"### {title}\n\n"
+        f"**Key:** `{entry.key}`\n\n"
+        f"**Title:** {entry.title or '[missing]'}\n\n"
+        f"**Authors:** {entry.author or '[missing]'}\n\n"
+        f"**Year:** {entry.year or '[missing]'}\n\n"
+        f"{body}"
+    )
 def _build_fetchers() -> dict[str, Any]:
     return {
         "arxiv": ArxivFetcher(),
     return local_db
+def _review_item(entry: BibEntry, best_result: Any, candidates: list[Any]) -> dict[str, Any]:
+    sorted_candidates = sorted(candidates, key=lambda item: item.confidence, reverse=True)
+    return {
+        "entry_key": entry.key,
+        "entry": entry,
+        "best_result": best_result,
+        "candidates": sorted_candidates,
+    }
+def _review_payload_from_item(item: dict[str, Any]) -> dict[str, Any]:
+    return _review_payload(
+        item["entry"],
+        item.get("best_result"),
+        item.get("candidates", []),
+    )
 def _review_payload(entry: BibEntry, best_result: Any, candidates: list[Any]) -> dict[str, Any]:
     return {
         "key": entry.key,