Upload folder using huggingface_hub

708f4a3 verified 12 days ago

47.7 kB

	"""
	XERV CRAYON V5.1.0 - OMNI-BACKEND FRONTEND
	==========================================
	The unified interface for CPU (AVX2/512), CUDA (NVIDIA), and ROCm (AMD) tokenization.
	Handles automatic hardware detection, zero-copy memory mapping, and dynamic profile switching.

	Architecture:
	- Default (device="auto"): Scans system for NVIDIA/AMD GPUs, falls back to CPU
	- Manual Override: Force device="cpu", "cuda", or "rocm"
	- Unified API: Same .tokenize() method works on all platforms

	Production Features:
	- Thread-safe operations with RLock
	- Zero-copy memory mapping for DAT profiles
	- Graceful fallback on hardware failures
	- Context manager for temporary profile switching
	- Full decode support with companion JSON files
	"""

	from __future__ import annotations

	import contextlib
	import json
	import logging
	import mmap
	import os
	import platform
	import sys
	import tempfile
	import threading
	from dataclasses import dataclass, field
	from enum import Enum
	from typing import (
	TYPE_CHECKING,
	Any,
	Callable,
	Dict,
	Final,
	List,
	Literal,
	Optional,
	Protocol,
	Sequence,
	Tuple,
	TypeVar,
	Union,
	cast,
	runtime_checkable,
	)

	if TYPE_CHECKING:
	from types import ModuleType

	# ============================================================================
	# LOGGING CONFIGURATION
	# ============================================================================

	_logger = logging.getLogger("crayon.vocab")
	_logger.addHandler(logging.NullHandler())

	# Production log handler (user can override)
	_console_handler = logging.StreamHandler()
	_console_handler.setFormatter(
	logging.Formatter("[CRAYON] %(levelname)s: %(message)s")
	)


	def enable_verbose_logging(level: int = logging.INFO) -> None:
	"""Enable console logging for Crayon operations."""
	_logger.addHandler(_console_handler)
	_logger.setLevel(level)


	def disable_verbose_logging() -> None:
	"""Disable console logging."""
	_logger.removeHandler(_console_handler)


	# ============================================================================
	# TYPE DEFINITIONS
	# ============================================================================

	DeviceType = Literal["auto", "cpu", "cuda", "rocm"]
	TokenIds = List[int]
	BatchTokenIds = List[List[int]]

	# Device priority order for auto-detection
	_DEVICE_PRIORITY: Final[Tuple[DeviceType, ...]] = ("cuda", "rocm", "cpu")


	class DeviceState(Enum):
	"""Backend initialization states."""
	UNINITIALIZED = "uninitialized"
	READY = "ready"
	FAILED = "failed"
	FALLBACK = "fallback"


	@runtime_checkable
	class CPUBackendProtocol(Protocol):
	"""Protocol for CPU backend module."""
	def load_dat(self, buffer: Any) -> int: ...
	def tokenize(self, text: str) -> List[int]: ...
	def get_hardware_info(self) -> str: ...


	@runtime_checkable
	class GPUBackendProtocol(Protocol):
	"""Protocol for GPU backend modules (CUDA/ROCm)."""
	def get_hardware_info(self) -> Any: ...


	@runtime_checkable
	class CUDABackendProtocol(Protocol):
	"""Protocol for CUDA backend module."""
	def get_hardware_info(self) -> Any: ...
	def load_gpu(self, data: bytes) -> Any: ...
	def tokenize_batch_gpu(self, batch: List[str]) -> Any: ...


	@runtime_checkable
	class ROCmBackendProtocol(Protocol):
	"""Protocol for ROCm backend module."""
	def get_hardware_info(self) -> Any: ...
	def load_rocm(self, data: bytes) -> int: ...
	def tokenize_batch_rocm(self, batch: List[str]) -> List[List[int]]: ...


	# ============================================================================
	# HARDWARE DETECTION UTILITIES
	# ============================================================================

	@dataclass(frozen=True)
	class HardwareInfo:
	"""Immutable hardware detection result."""
	device: DeviceType
	name: str
	features: str
	vram_mb: Optional[int] = None
	compute_capability: Optional[str] = None
	is_available: bool = True
	error: Optional[str] = None


	def _detect_cuda_availability() -> Tuple[bool, Optional[str]]:
	"""
	Multi-layer CUDA detection.

	Checks in order:
	1. Direct extension import + runtime test
	2. PyTorch CUDA availability (if installed)
	3. Environment markers (CUDA_VISIBLE_DEVICES, etc.)

	Returns:
	Tuple of (is_available, error_message)
	"""
	# Layer 1: Direct extension
	try:
	from ..c_ext import crayon_cuda
	info = crayon_cuda.get_hardware_info()
	if isinstance(info, dict) and info.get("name"):
	return True, None
	return True, None
	except ImportError:
	pass
	except Exception as e:
	return False, f"CUDA extension failed: {e}"

	# Layer 2: PyTorch check
	try:
	import torch
	if torch.cuda.is_available():
	return True, None
	except ImportError:
	pass
	except Exception:
	pass

	# Layer 3: Environment check
	cuda_visible = os.environ.get("CUDA_VISIBLE_DEVICES", "")
	if cuda_visible and cuda_visible != "-1":
	# CUDA devices are set, but we can't use them without the extension
	return False, "CUDA_VISIBLE_DEVICES set but extension not available"

	return False, "No CUDA installation detected"


	def _detect_rocm_availability() -> Tuple[bool, Optional[str]]:
	"""
	Multi-layer ROCm detection.

	Checks in order:
	1. Direct extension import + runtime test
	2. HIP environment markers
	3. AMD GPU sysfs check (Linux only)

	Returns:
	Tuple of (is_available, error_message)
	"""
	# Layer 1: Direct extension
	try:
	from ..c_ext import crayon_rocm
	info = crayon_rocm.get_hardware_info()
	if isinstance(info, str):
	if "Device Not Found" in info:
	return False, info
	return True, None
	if isinstance(info, dict):
	return True, None
	return True, None
	except ImportError:
	pass
	except Exception as e:
	return False, f"ROCm extension failed: {e}"

	# Layer 2: HIP environment check
	hip_visible = os.environ.get("HIP_VISIBLE_DEVICES", "")
	if hip_visible and hip_visible != "-1":
	return False, "HIP_VISIBLE_DEVICES set but extension not available"

	# Layer 3: Linux sysfs check
	if sys.platform == "linux":
	amd_gpu_paths = ["/sys/class/drm/card0/device/vendor"]
	for path in amd_gpu_paths:
	try:
	with open(path, "r") as f:
	vendor = f.read().strip()
	if vendor == "0x1002": # AMD vendor ID
	return False, "AMD GPU detected but extension not available"
	except (IOError, OSError):
	pass

	return False, "No ROCm installation detected"


	def _get_cpu_info() -> HardwareInfo:
	"""Detect CPU capabilities."""
	try:
	from ..c_ext import crayon_cpu
	info_str = crayon_cpu.get_hardware_info()
	return HardwareInfo(
	device="cpu",
	name=info_str.split("[")[0].strip() if "[" in info_str else info_str,
	features=info_str.split("[")[1].rstrip("]") if "[" in info_str else "Standard",
	is_available=True,
	)
	except Exception as e:
	# Fallback to platform info
	return HardwareInfo(
	device="cpu",
	name=platform.processor() or "Unknown CPU",
	features="Standard",
	is_available=True,
	error=str(e),
	)


	# ============================================================================
	# PROFILE RESOLUTION
	# ============================================================================

	def _get_profile_search_paths(profile_name: str) -> List[str]:
	"""
	Generate ordered list of paths to search for a profile.

	Search order:
	1. Exact path (if file exists)
	2. Package resources (editable install)
	3. pkg_resources (wheel install)
	4. importlib.resources (modern Python)
	5. CRAYON_PROFILE_DIR environment variable
	6. User cache (~/.cache/xerv/crayon/profiles/)
	7. System cache (/var/cache/crayon/ on Linux)
	"""
	paths: List[str] = []
	expected_dat = f"vocab_{profile_name}.dat"

	# Package resources (editable install)
	rel_path = os.path.join(
	os.path.dirname(__file__), "..", "resources", "dat", expected_dat
	)
	paths.append(os.path.abspath(rel_path))

	# importlib.resources (Python 3.9+ - preferred modern approach)
	try:
	from importlib import resources
	try:
	# Python 3.11+ API with files()
	ref = resources.files("crayon").joinpath("resources", "dat", expected_dat)
	with resources.as_file(ref) as p:
	paths.append(str(p))
	except (TypeError, AttributeError, FileNotFoundError):
	pass
	except Exception:
	pass

	# CRAYON_PROFILE_DIR environment variable
	profile_dir = os.environ.get("CRAYON_PROFILE_DIR")
	if profile_dir:
	paths.append(os.path.join(os.path.expanduser(profile_dir), expected_dat))

	# User cache
	home = os.path.expanduser("~")
	paths.append(os.path.join(home, ".cache", "xerv", "crayon", "profiles", expected_dat))

	# System cache (Linux)
	if sys.platform == "linux":
	paths.append(f"/var/cache/crayon/{expected_dat}")

	return paths


	# ============================================================================
	# MAIN CLASS: CrayonVocab
	# ============================================================================

	class CrayonVocab:
	"""
	The High-Performance Tokenizer Interface.

	Automatically dispatches to the fastest available hardware backend.
	Supports hot-swapping vocabulary profiles and batch processing.

	Thread Safety:
	All public methods are thread-safe via an internal RLock.

	Memory Model:
	- CPU: Zero-copy mmap access to DAT file
	- CUDA: Full copy to GPU VRAM (async transfer)
	- ROCm: Full copy to GPU HBM (async transfer)

	Examples:
	>>> # Auto-detect best device
	>>> vocab = CrayonVocab(device="auto")
	>>> vocab.load_profile("lite")
	>>> tokens = vocab.tokenize("Hello, world!")

	>>> # Force CPU for latency-sensitive workloads
	>>> vocab = CrayonVocab(device="cpu")
	>>> vocab.load_profile("standard")
	>>> tokens = vocab.tokenize("def forward(self, x):")

	>>> # Batch processing on GPU
	>>> vocab = CrayonVocab(device="cuda")
	>>> vocab.load_profile("lite")
	>>> batch_tokens = vocab.tokenize(["doc1", "doc2", "doc3"])

	>>> # Context manager for temporary profile switch
	>>> vocab.load_profile("lite")
	>>> with vocab.using_profile("standard"):
	... tokens = vocab.tokenize("E=mc²")
	>>> # Back to "lite" profile automatically
	"""

	__slots__ = (
	"_lock",
	"_cpu_backend",
	"_cpu_backend_type",
	"_gpu_backend",
	"_dat_file_ref",
	"_dat_mem_ref",
	"_idx_to_str",
	"current_profile_path",
	"_profile_loaded",
	"_temp_dat_path",
	"unk_token",
	"unk_token_id",
	"device",
	"_requested_device",
	"_device_state",
	"_hardware_info",
	)

	def __init__(
	self,
	vocab_list: Optional[List[str]] = None,
	device: DeviceType = "auto",
	unk_token: str = "<UNK>"
	) -> None:
	"""
	Initialize the tokenizer engine.

	Args:
	vocab_list: Optional list of strings to build an ad-hoc vocabulary.
	device: Device selection mode.
	- "auto": Detects GPU. If available, uses it. Else CPU.
	- "cpu": Forces AVX2/AVX-512 CPU backend (best for latency).
	- "cuda": Forces NVIDIA GPU backend (best for batch throughput).
	- "rocm": Forces AMD GPU backend (best for batch throughput).
	unk_token: String to use as the unknown token placeholder.

	Raises:
	ImportError: If the CPU backend extension is not available.
	ValueError: If an invalid device string is provided.

	Environment Variables:
	CRAYON_DEVICE: Override device selection (cpu\|cuda\|rocm)
	CRAYON_PROFILE_DIR: Custom profile search directory
	"""
	self._lock = threading.RLock()

	# Backend references
	self._cpu_backend: Optional[CPUBackendProtocol] = None
	self._gpu_backend: Optional[Union[CUDABackendProtocol, ROCmBackendProtocol]] = None

	# Profile state
	self._dat_file_ref: Optional[Any] = None
	self._dat_mem_ref: Optional[mmap.mmap] = None
	self._idx_to_str: List[str] = []
	self.current_profile_path: Optional[str] = None
	self._profile_loaded: bool = False
	self._temp_dat_path: Optional[str] = None

	# Public properties for test compatibility
	self.unk_token = unk_token
	self.unk_token_id = 1 # Hardware convention in Crayon v2

	# Device state
	self._requested_device: DeviceType = device
	self._device_state: DeviceState = DeviceState.UNINITIALIZED
	self._hardware_info: Optional[HardwareInfo] = None

	# Validate device parameter
	if device not in ("auto", "cpu", "cuda", "rocm"):
	raise ValueError(
	f"Invalid device: {device!r}. Must be 'auto', 'cpu', 'cuda', or 'rocm'."
	)

	# --- Critical: Load CPU Backend ---
	self._load_cpu_backend()

	# --- Resolve and Initialize Device ---
	self.device = self._resolve_device(device)
	self._init_selected_backend()
	print(f"🔧 INITIALIZING DEVICE: {self.device.upper()}")

	# --- Load ad-hoc vocab if provided ---
	if vocab_list:
	self.load_from_list(vocab_list)

	def _load_cpu_backend(self) -> None:
	"""Load the CPU extension (required as fallback for all modes)."""
	try:
	from ..c_ext import get_cpu_backend
	cpu_backend = get_cpu_backend()
	if cpu_backend is None:
	from ..c_ext import get_cpu_error
	cpu_error = get_cpu_error()
	print("🔴 CPU BACKEND FAILED: Using pure Python fallback")
	print(f" Error: {cpu_error}")
	_logger.critical("Failed to load crayon_cpu extension: %s", cpu_error)
	raise ImportError(
	f"Critical Crayon Error: 'crayon_cpu' extension not found. {cpu_error}\n"
	"The package may not be installed correctly. Try:\n"
	" pip install --force-reinstall xerv-crayon\n"
	"Or for development:\n"
	" pip install -e .\n"
	)

	# Check if we're using compiled extension or fallback
	if hasattr(cpu_backend, '__class__') and 'PurePython' in str(cpu_backend.__class__):
	print("🟡 CPU BACKEND: Pure Python (slower)")
	backend_type = "Pure Python"
	else:
	print("✅ CPU BACKEND: Compiled C++ Extension (maximum performance)")
	backend_type = "Compiled C++"

	# Get hardware info
	try:
	hw_info = cpu_backend.get_hardware_info()
	print(f" Hardware: {hw_info}")
	except:
	print(" Hardware: Unknown")

	self._cpu_backend = cpu_backend
	self._cpu_backend_type = backend_type
	_logger.debug("CPU backend loaded successfully")
	except ImportError as e:
	print("🔴 CPU BACKEND FAILED: Import error")
	print(f" Error: {str(e)}")
	_logger.critical("Failed to load crayon_cpu extension: %s", str(e))
	raise ImportError(
	f"Critical Crayon Error: 'crayon_cpu' extension not found. {str(e)}\n"
	"The package may not be installed correctly. Try:\n"
	" pip install --force-reinstall xerv-crayon\n"
	"Or for development:\n"
	" pip install -e .\n"
	) from e

	def _resolve_device(self, requested: DeviceType) -> DeviceType:
	"""
	Resolve the actual device to use based on request and availability.

	Auto mode priority: CUDA > ROCm > CPU
	"""
	# Check environment override
	env_override = os.environ.get("CRAYON_DEVICE", "").strip().lower()
	if requested == "auto" and env_override in ("cpu", "cuda", "rocm"):
	requested = cast(DeviceType, env_override)
	_logger.info("Device override from CRAYON_DEVICE=%s", env_override)

	# Direct request (non-auto)
	if requested != "auto":
	return requested

	# Auto-detection priority
	cuda_ok, cuda_err = _detect_cuda_availability()
	if cuda_ok:
	_logger.debug("CUDA detected and available")
	return "cuda"
	elif cuda_err:
	_logger.debug("CUDA check: %s", cuda_err)

	rocm_ok, rocm_err = _detect_rocm_availability()
	if rocm_ok:
	_logger.debug("ROCm detected and available")
	return "rocm"
	elif rocm_err:
	_logger.debug("ROCm check: %s", rocm_err)

	_logger.debug("Defaulting to CPU backend")
	return "cpu"

	def _init_selected_backend(self) -> None:
	"""Initialize the selected backend with fallback handling."""
	if self.device == "cpu":
	self._gpu_backend = None
	self._device_state = DeviceState.READY
	try:
	info = self._cpu_backend.get_hardware_info()
	self._hardware_info = HardwareInfo(
	device="cpu",
	name=info.split("[")[0].strip() if "[" in info else info,
	features=info.split("[")[1].rstrip("]") if "[" in info else "Standard",
	)
	print(f"✅ DEVICE READY: CPU ({self._cpu_backend_type})")
	print(f" Hardware: {info}")
	_logger.info("🔵 CPU Engine Active: %s", info)
	except Exception:
	self._hardware_info = _get_cpu_info()
	print(f"✅ DEVICE READY: CPU ({self._cpu_backend_type})")
	print(f" Hardware: {self._hardware_info.name}")
	_logger.info("🔵 CPU Engine Active")
	return

	if self.device == "cuda":
	try:
	from ..c_ext import crayon_cuda
	info = crayon_cuda.get_hardware_info()
	self._gpu_backend = crayon_cuda
	self._device_state = DeviceState.READY

	if isinstance(info, dict):
	self._hardware_info = HardwareInfo(
	device="cuda",
	name=info.get("name", "NVIDIA GPU"),
	features="CUDA",
	vram_mb=info.get("vram_mb"),
	compute_capability=info.get("compute_capability"),
	)
	_logger.info("🟢 NVIDIA CUDA Engine Active: %s", info.get("full_info", info.get("name")))
	else:
	self._hardware_info = HardwareInfo(
	device="cuda",
	name=str(info),
	features="CUDA",
	)
	_logger.info("🟢 NVIDIA CUDA Engine Active: %s", info)
	return
	except ImportError:
	detailed_error = self._get_cuda_import_error()
	_logger.warning("CUDA extension not compiled. Falling back to CPU.\n%s", detailed_error)
	except Exception as e:
	_logger.warning("CUDA initialization failed (%s). Falling back to CPU.", e)

	self._device_state = DeviceState.FALLBACK
	self.device = "cpu"
	self._init_selected_backend()
	return

	if self.device == "rocm":
	try:
	from ..c_ext import crayon_rocm
	info = crayon_rocm.get_hardware_info()

	if isinstance(info, str) and "Device Not Found" in info:
	raise RuntimeError(info)

	self._gpu_backend = crayon_rocm
	self._device_state = DeviceState.READY

	if isinstance(info, str):
	self._hardware_info = HardwareInfo(
	device="rocm",
	name=info.split("[")[0].strip() if "[" in info else info,
	features="ROCm/HIP",
	)
	else:
	self._hardware_info = HardwareInfo(
	device="rocm",
	name=str(info),
	features="ROCm/HIP",
	)
	_logger.info("🔴 AMD ROCm Engine Active: %s", info)
	return
	except ImportError:
	_logger.warning("ROCm extension not compiled. Falling back to CPU.")
	except Exception as e:
	_logger.warning("ROCm initialization failed (%s). Falling back to CPU.", e)

	self._device_state = DeviceState.FALLBACK
	self.device = "cpu"
	self._init_selected_backend()
	return

	def _get_cuda_import_error(self) -> str:
	"""
	Generate detailed CUDA import error information for debugging.

	Returns:
	Detailed multi-line error message with specific fixes.
	"""
	import shutil
	import sys

	error_lines = [
	"╔══════════════════════════════════════════════════════════════════════════════╗",
	"║ CUDA EXTENSION COMPILATION FAILED ║",
	"╚══════════════════════════════════════════════════════════════════════════════╝",
	"",
	"ROOT CAUSE ANALYSIS:",
	"────────────────────",
	]

	# Check NVCC
	nvcc_path = shutil.which("nvcc")
	if nvcc_path:
	error_lines.append(f"✓ NVCC found: {nvcc_path}")
	else:
	error_lines.append("✗ NVCC NOT FOUND - NVIDIA CUDA Toolkit not installed or not in PATH")
	error_lines.append("")
	error_lines.append("INSTALLATION FIX:")
	error_lines.append("1. Install NVIDIA CUDA Toolkit (12.1+ recommended):")
	error_lines.append(" https://developer.nvidia.com/cuda-downloads")
	error_lines.append("2. Add CUDA to PATH:")
	error_lines.append(" Windows: C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.x\\bin")
	error_lines.append(" Linux: /usr/local/cuda/bin")
	error_lines.append("3. Restart terminal/command prompt")

	# Check PyTorch CUDA
	try:
	import torch
	if torch.cuda.is_available():
	error_lines.append(f"✓ PyTorch CUDA: Available (v{torch.__version__})")
	else:
	error_lines.append(f"✗ PyTorch CUDA: NOT AVAILABLE (v{torch.__version__}+cpu)")
	error_lines.append("")
	error_lines.append("PYTORCH FIX:")
	error_lines.append("1. Uninstall CPU-only PyTorch:")
	error_lines.append(" pip uninstall torch")
	error_lines.append("2. Install CUDA version:")
	error_lines.append(" pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121")
	except ImportError:
	error_lines.append("✗ PyTorch: NOT INSTALLED")
	error_lines.append("")
	error_lines.append("PYTORCH INSTALLATION:")
	error_lines.append("pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121")

	# Check CUDA_HOME
	cuda_home = os.environ.get("CUDA_HOME") or os.environ.get("CUDA_PATH")
	if cuda_home:
	error_lines.append(f"✓ CUDA_HOME: {cuda_home}")
	else:
	error_lines.append("✗ CUDA_HOME NOT SET")
	error_lines.append("")
	error_lines.append("ENVIRONMENT VARIABLES:")
	error_lines.append("Windows: Set CUDA_PATH = C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.x")
	error_lines.append("Linux: export CUDA_HOME=/usr/local/cuda")

	# Check GPU hardware
	try:
	import torch
	if torch.cuda.is_available() and torch.cuda.device_count() > 0:
	gpu_name = torch.cuda.get_device_name(0)
	error_lines.append(f"✓ GPU Hardware: {gpu_name}")
	else:
	error_lines.append("✗ No CUDA-compatible GPU detected")
	except:
	error_lines.append("⚠ Cannot detect GPU hardware")

	# Compilation instructions
	error_lines.extend([
	"",
	"RECOMPilation INSTRUCTIONS:",
	"──────────────────────────",
	"After fixing the above issues, rebuild CRAYON:",
	"",
	"Development install:",
	" pip install -e . --force-reinstall --verbose",
	"",
	"Production install:",
	" pip install --force-reinstall xerv-crayon --verbose",
	"",
	"Forced CUDA build (if you have CUDA but no GPU):",
	" set CRAYON_FORCE_CUDA=1",
	" pip install -e . --force-reinstall",
	"",
	"Generic wheel build (for distribution):",
	" set CRAYON_GENERIC_BUILD=1",
	" python -m build",
	"",
	"If problems persist, check: https://github.com/Electroiscoding/CRAYON/issues",
	"╔══════════════════════════════════════════════════════════════════════════════╗"
	])

	return "\n".join(error_lines)

	def set_device(
	self,
	device: DeviceType,
	*,
	reload_profile: bool = True,
	) -> None:
	"""
	Switch the active backend at runtime.

	Args:
	device: New device to use ("auto", "cpu", "cuda", "rocm").
	reload_profile: If True and a profile was loaded, reload it on new backend.

	Note:
	If the requested backend is unavailable, this falls back to CPU.
	"""
	with self._lock:
	previous_profile = self.current_profile_path
	had_profile = self._profile_loaded and previous_profile is not None

	self._requested_device = device
	self.device = self._resolve_device(device)
	self._init_selected_backend()

	if reload_profile and had_profile:
	self.load_profile(previous_profile)

	def _resolve_profile_path(self, name_or_path: str) -> str:
	"""
	Resolve a profile name or path to an absolute file path.

	Args:
	name_or_path: Either a profile name ("lite", "code") or full path.

	Returns:
	Absolute path to the .dat file.

	Raises:
	FileNotFoundError: If the profile cannot be found.
	"""
	# Check if it's already a valid path
	candidate = os.path.expanduser(name_or_path)
	if os.path.exists(candidate):
	return os.path.abspath(candidate)

	# Search in known locations
	search_paths = _get_profile_search_paths(name_or_path)
	for path in search_paths:
	if os.path.exists(path):
	return path

	# Generate helpful error message
	checked_locations = "\n".join(f" - {p}" for p in search_paths[:4])
	raise FileNotFoundError(
	f"Profile '{name_or_path}' not found.\n"
	f"Searched locations:\n{checked_locations}\n"
	f"You can specify the full path or set CRAYON_PROFILE_DIR environment variable."
	)

	@property
	def id_to_token(self) -> List[str]:
	"""Get the ID-to-token mapping list (for compatibility)."""
	return self._idx_to_str

	def __len__(self) -> int:
	"""Return the total number of tokens in the active vocabulary."""
	return len(self._idx_to_str)

	def __contains__(self, token: str) -> bool:
	"""Check if a token exists in the active vocabulary (O(N) fallback)."""
	return token in self._idx_to_str

	def load_from_list(self, vocab: List[str]) -> None:
	"""Build and load a temporary DAT profile from a list of strings."""
	try:
	from ..c_ext import crayon_compiler
	except ImportError:
	raise ImportError("crayon_compiler extension required for load_from_list()")

	with self._lock:
	# Create a secure temporary file
	fd, path = tempfile.mkstemp(suffix=".dat")
	os.close(fd)

	try:
	# Compile to the temp file
	crayon_compiler.compile_dat(vocab, path)

	# IMPORTANT: Since load_profile() expects a .json file to load _idx_to_str,
	# we create a dummy JSON or just bypass the load_profile JSON loading
	# by manually setting _idx_to_str after load_profile.
	self.load_profile(path)

	# Override the idx_to_str which failed to load during load_profile (since no .json exists)
	self._idx_to_str = list(vocab)
	self._temp_dat_path = path

	except Exception as e:
	if os.path.exists(path):
	os.unlink(path)
	raise RuntimeError(f"Failed to build ad-hoc vocabulary: {e}")

	def _close_profile_handles(self) -> None:
	"""Safely close any open file handles."""
	if self._dat_mem_ref is not None:
	try:
	self._dat_mem_ref.close()
	except Exception:
	pass
	self._dat_mem_ref = None

	if self._dat_file_ref is not None:
	try:
	self._dat_file_ref.close()
	except Exception:
	pass
	self._dat_file_ref = None

	# Clean up temporary DAT if exists
	if hasattr(self, '_temp_dat_path') and self._temp_dat_path and os.path.exists(self._temp_dat_path):
	try:
	os.unlink(self._temp_dat_path)
	except Exception:
	pass
	self._temp_dat_path = None

	def close(self) -> None:
	"""Release all resources and close file handles."""
	with self._lock:
	self._close_profile_handles()
	self.current_profile_path = None
	self._idx_to_str = []
	self._profile_loaded = False

	def __del__(self) -> None:
	"""Destructor to ensure resources are released."""
	try:
	self.close()
	except Exception:
	pass

	def __enter__(self) -> "CrayonVocab":
	"""Context manager entry."""
	return self

	def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
	"""Context manager exit (closes resources)."""
	self.close()

	def load_profile(self, name_or_path: str) -> None:
	"""
	Hot-swap the active vocabulary profile.

	Args:
	name_or_path: Either a profile name (e.g., "lite", "code", "science")
	or a full path to a .dat file.

	Raises:
	FileNotFoundError: If the profile cannot be found.
	OSError: If the file cannot be memory-mapped.
	RuntimeError: If profile loading fails on the current device.

	Note:
	This method automatically loads the companion .json file for decode().
	The .json file should have the same base name as the .dat file.
	"""
	with self._lock:
	self._profile_loaded = False
	path = self._resolve_profile_path(name_or_path)
	self.current_profile_path = path

	# Load decoder mapping (companion JSON)
	# Load decoder mapping (companion JSON)
	json_path = os.path.splitext(path)[0] + ".json"
	if os.path.exists(json_path):
	try:
	with open(json_path, "r", encoding="utf-8") as jf:
	loaded = json.load(jf)

	if isinstance(loaded, list):
	# V1 Legacy Format (List of strings)
	self._idx_to_str = loaded
	elif isinstance(loaded, dict) and "vocab" in loaded:
	# V2 Format (Dict with 'vocab' key: string -> int)
	vocab_map = loaded["vocab"]
	if not vocab_map:
	self._idx_to_str = []
	else:
	max_id = max(vocab_map.values())
	temp_list = [""] * (max_id + 1)
	for token, tid in vocab_map.items():
	if 0 <= tid <= max_id:
	temp_list[tid] = token
	self._idx_to_str = temp_list
	else:
	raise ValueError("JSON must be a list or dict with 'vocab' key")

	except Exception as e:
	_logger.warning("Failed to load decoder JSON: %s", e)
	self._idx_to_str = []
	else:
	self._idx_to_str = []

	# Close previous handles
	self._close_profile_handles()

	# Memory-map the DAT file
	try:
	self._dat_file_ref = open(path, "rb")
	self._dat_mem_ref = mmap.mmap(
	self._dat_file_ref.fileno(), 0, access=mmap.ACCESS_READ
	)
	except OSError as e:
	self._close_profile_handles()
	raise OSError(
	f"Failed to memory-map profile: {path}. "
	f"Ensure the file exists and is readable. Error: {e}"
	) from e

	# Dispatch to appropriate backend
	if self.device == "cpu":
	self._cpu_backend.load_dat(self._dat_mem_ref)
	self._profile_loaded = True
	_logger.debug("Profile loaded on CPU: %s", os.path.basename(path))
	return

	if self.device == "cuda":
	try:
	raw_bytes = self._dat_mem_ref[:]
	result = self._gpu_backend.load_gpu(raw_bytes)
	self._profile_loaded = True
	# ALSO LOAD CPU FOR FALLBACK
	self._cpu_backend.load_dat(self._dat_mem_ref)
	_logger.debug("Profile loaded on CUDA: %s (result: %s)", os.path.basename(path), result)
	return
	except Exception as e:
	_logger.warning("CUDA profile load failed (%s). Falling back to CPU.", e)
	self.device = "cpu"
	self._device_state = DeviceState.FALLBACK
	self._init_selected_backend()
	self._cpu_backend.load_dat(self._dat_mem_ref)
	self._profile_loaded = True
	return

	if self.device == "rocm":
	try:
	raw_bytes = self._dat_mem_ref[:]
	self._gpu_backend.load_rocm(raw_bytes)
	self._profile_loaded = True
	# ALSO LOAD CPU FOR FALLBACK
	self._cpu_backend.load_dat(self._dat_mem_ref)
	_logger.debug("Profile loaded on ROCm: %s", os.path.basename(path))
	return
	except Exception as e:
	_logger.warning("ROCm profile load failed (%s). Falling back to CPU.", e)
	self.device = "cpu"
	self._device_state = DeviceState.FALLBACK
	self._init_selected_backend()
	self._cpu_backend.load_dat(self._dat_mem_ref)
	self._profile_loaded = True
	return

	raise RuntimeError(f"Unhandled device state: {self.device!r}")

	@contextlib.contextmanager
	def using_profile(self, name_or_path: str):
	"""
	Context manager for temporarily switching profiles.

	Args:
	name_or_path: Profile name or path to use within the context.

	Yields:
	self: The CrayonVocab instance with the new profile loaded.

	Note:
	The previous profile is automatically restored on exit.
	If no profile was loaded before, the new profile remains active.

	Example:
	>>> vocab.load_profile("lite")
	>>> with vocab.using_profile("standard"):
	... tokens = vocab.tokenize(source_code)
	>>> # Back to "lite" profile automatically
	"""
	previous_path = self.current_profile_path
	try:
	self.load_profile(name_or_path)
	yield self
	finally:
	if previous_path:
	self.load_profile(previous_path)

	def tokenize(
	self,
	text_input: Union[str, Sequence[str]],
	) -> Union[List[int], List[List[int]]]:
	"""
	Tokenize text using the active vocabulary profile.

	Args:
	text_input: Input to tokenize.
	- str: Returns List[int] (single sequence)
	- Sequence[str]: Returns List[List[int]] (batch)

	Returns:
	Token IDs as a list or list of lists.

	Raises:
	RuntimeError: If no profile is loaded.
	TypeError: If input is not str or sequence of str.

	Performance Notes:
	- CPU: Optimized for single-string latency (~1µs overhead)
	- GPU: Optimized for batch throughput (launch overhead amortized)
	- For <100 strings, CPU may be faster even with GPU available
	"""
	with self._lock:
	if not self._profile_loaded:
	raise RuntimeError(
	"No vocabulary profile loaded. Call load_profile() first."
	)

	# Determine input type
	if isinstance(text_input, str):
	is_batch = False
	batch: List[str] = [text_input]
	else:
	is_batch = True
	batch = list(text_input)

	# Handle empty batch
	if not batch:
	return [] if is_batch else []

	# Validate all items are strings
	for i, item in enumerate(batch):
	if not isinstance(item, str):
	raise TypeError(
	f"tokenize() expects str or Sequence[str], "
	f"got {type(item).__name__} at index {i}"
	)

	# --- GPU PATH ---
	if self.device in ("cuda", "rocm") and self._gpu_backend is not None:
	try:
	if self.device == "cuda":
	ret = self._gpu_backend.tokenize_batch_gpu(batch)
	# CUDA returns (results, metadata) tuple
	results = ret[0] if isinstance(ret, tuple) else ret
	else:
	results = self._gpu_backend.tokenize_batch_rocm(batch)

	return results if is_batch else results[0]
	except Exception as e:
	_logger.warning("GPU tokenization failed (%s). Using CPU fallback.", e)
	# Fall through to CPU path

	# --- CPU PATH ---
	if is_batch:
	return [self._cpu_backend.tokenize(s) for s in batch]
	return self._cpu_backend.tokenize(batch[0])

	def decode(self, tokens: Sequence[int]) -> str:
	"""
	Decode token IDs back to text.

	Args:
	tokens: Sequence of token IDs to decode.

	Returns:
	Reconstructed text string.

	Raises:
	RuntimeError: If no profile is loaded or decoder JSON is missing.
	TypeError: If tokens is not a sequence of integers.
	ValueError: If any token ID is out of range.

	Note:
	Requires a companion .json file with the same base name as the .dat profile.
	"""
	if not self._profile_loaded:
	raise RuntimeError(
	"No vocabulary profile loaded. Call load_profile() first."
	)

	if not self._idx_to_str:
	raise RuntimeError(
	"Decoder mapping not loaded. Ensure the profile has a companion .json file "
	"with the same base name as the .dat file."
	)

	out: List[str] = []
	for i, t in enumerate(tokens):
	if not isinstance(t, int):
	raise TypeError(
	f"decode() expects sequence of ints, got {type(t).__name__} at index {i}"
	)
	if t < 0 or t >= len(self._idx_to_str):
	raise ValueError(
	f"Token ID {t} out of range [0, {len(self._idx_to_str) - 1}]"
	)
	out.append(self._idx_to_str[t])

	return "".join(out)

	def get_info(self) -> Dict[str, Any]:
	"""
	Get metadata about the current engine state.

	Returns:
	Dictionary with device info, backend type, and active profile.
	"""
	profile_name = (
	os.path.basename(self.current_profile_path)
	if self.current_profile_path
	else None
	)
	backend = (
	"cpu_extension" if self.device == "cpu" else f"{self.device}_extension"
	)

	info: Dict[str, Any] = {
	"device": self.device,
	"backend": backend,
	"active_profile": profile_name,
	"profile_loaded": self._profile_loaded,
	"vocab_size": len(self._idx_to_str) if self._idx_to_str else None,
	"device_state": self._device_state.value,
	}

	if self._hardware_info:
	info["hardware"] = {
	"name": self._hardware_info.name,
	"features": self._hardware_info.features,
	}
	if self._hardware_info.vram_mb:
	info["hardware"]["vram_mb"] = self._hardware_info.vram_mb
	if self._hardware_info.compute_capability:
	info["hardware"]["compute_capability"] = self._hardware_info.compute_capability

	return info

	def __repr__(self) -> str:
	"""Return a developer-friendly representation."""
	profile = os.path.basename(self.current_profile_path) if self.current_profile_path else "None"
	return f"<CrayonVocab device={self.device!r} profile={profile!r} loaded={self._profile_loaded}>"

	@property
	def vocab_size(self) -> int:
	"""Get the vocabulary size (number of tokens)."""
	return len(self._idx_to_str) if self._idx_to_str else 0

	@property
	def is_gpu(self) -> bool:
	"""Check if running on GPU backend."""
	return self.device in ("cuda", "rocm") and self._gpu_backend is not None

	@property
	def is_profile_loaded(self) -> bool:
	"""Check if a profile is currently loaded."""
	return self._profile_loaded

	@property
	def fast_mode(self) -> bool:
	"""Check if running in high-performance mode (C++ backend)."""
	return self.device in ("cpu", "cuda", "rocm") and (self._cpu_backend is not None or self._gpu_backend is not None)

	def longest_match(self, text: str, pos: int = 0) -> Tuple[int, int]:
	"""
	Find the longest matching token at the given position (Compatibility Mode).

	Note: This is slower than tokenize() as it creates a substring.
	"""
	if pos >= len(text):
	return self.unk_token_id, 0

	# Optimization: We only need to check a reasonable window
	# The longest token is rarely more than 100 characters.
	window = text[pos : pos + 128]
	tokens = self.tokenize(window)

	if not tokens:
	return self.unk_token_id, 1

	# Get the first token ID
	first_id = tokens[0]

	# Get its length from id_to_token
	if 0 <= first_id < len(self._idx_to_str):
	token_str = self._idx_to_str[first_id]
	return first_id, len(token_str)
	else:
	return self.unk_token_id, 1


	# ============================================================================
	# CONVENIENCE FUNCTIONS
	# ============================================================================

	def quick_tokenize(
	text: Union[str, Sequence[str]],
	profile: str = "lite",
	device: DeviceType = "auto",
	) -> Union[List[int], List[List[int]]]:
	"""
	One-shot tokenization without explicitly managing CrayonVocab.

	Args:
	text: Text or list of texts to tokenize.
	profile: Profile name to use (default: "lite").
	device: Device selection (default: "auto").

	Returns:
	Token IDs.

	Note:
	For repeated tokenization, create a CrayonVocab instance instead.
	This function has initialization overhead on each call.
	"""
	vocab = CrayonVocab(device=device)
	vocab.load_profile(profile)
	return vocab.tokenize(text)


	# ============================================================================
	# MODULE EXPORTS
	# ============================================================================

	__all__ = [
	"CrayonVocab",
	"DeviceType",
	"HardwareInfo",
	"DeviceState",
	"quick_tokenize",
	"enable_verbose_logging",
	"disable_verbose_logging",
	]