""" Local Conference Database: fast, offline title lookup against DBLP index. This module provides a local database of conference/journal proceedings downloaded from DBLP. It serves as a "ground truth" source that eliminates the need for network API calls for entries that match known publications. """ import json import re from pathlib import Path from typing import Optional from dataclasses import dataclass def _normalize(title: str) -> str: """Normalize a title for index lookup (must match build_index.py).""" title = re.sub(r'\{([^}]*)\}', r'\1', title) title = re.sub(r'[^\w\s]', ' ', title.lower()) return re.sub(r'\s+', ' ', title).strip() @dataclass class LocalMatch: """Result from a local DB lookup.""" title: str author: str year: str booktitle: str journal: str doi: str url: str pages: str volume: str entry_type: str source_file: str class LocalConferenceDB: """Title-based lookup against locally cached DBLP proceedings.""" def __init__(self, index_dir: str = None): if index_dir is None: base = Path(__file__).resolve().parent.parent / "data" self._shard_dir = base / "index_shards" self._legacy_path = base / "conference_index.json" else: self._shard_dir = Path(index_dir) self._legacy_path = Path(index_dir).parent / "conference_index.json" self._idx: dict = {} self._loaded = False def load(self) -> bool: """Load index from shards or legacy single file. Returns True if successful.""" try: # Try sharded index first if self._shard_dir.exists(): shard_files = sorted(self._shard_dir.glob("index_*.json")) if shard_files: for shard_path in shard_files: shard_data = json.loads(shard_path.read_text(encoding="utf-8")) self._idx.update(shard_data) self._loaded = True print(f" 📚 Local DB: {len(self._idx):,} entries loaded ({len(shard_files)} shards).") return True # Fallback: legacy single file if self._legacy_path.exists(): self._idx = json.loads(self._legacy_path.read_text(encoding="utf-8")) self._loaded = True print(f" 📚 Local DB: {len(self._idx):,} entries loaded.") return True print(" ⚠ Local DB not found. Run: python scripts/update_db.py && python scripts/build_index.py") return False except Exception as e: print(f" ⚠ Failed to load local DB: {e}") return False @property def is_loaded(self) -> bool: return self._loaded and len(self._idx) > 0 def lookup(self, title: str) -> Optional[LocalMatch]: """ Look up an entry by title. Returns LocalMatch if found, None otherwise. """ if not self._loaded: return None key = _normalize(title) data = self._idx.get(key) if not data: return None return LocalMatch( title=data.get("title", ""), author=data.get("author", ""), year=data.get("year", ""), booktitle=data.get("booktitle", ""), journal=data.get("journal", ""), doi=data.get("doi", ""), url=data.get("url", ""), pages=data.get("pages", ""), volume=data.get("volume", ""), entry_type=data.get("_type", "inproceedings"), source_file=data.get("_source", ""), )