# ocr_engine.py import pytesseract from PIL import Image import io class OcrEngine: def __init__(self): """ Initializes the OCR Engine using Tesseract. """ self.available = False try: # Check availability by querying version pytesseract.get_tesseract_version() print("[OK] Tesseract OCR Engine loaded.") self.available = True except Exception as e: print(f"[ERROR] Tesseract OCR not found: {e}") print("[INFO] Install Tesseract system-wide (e.g., 'apt-get install tesseract-ocr') and 'pip install pytesseract'.") def extract_text(self, image_bytes: bytes) -> str: """ Converts image bytes to text. """ if not self.available: return "" try: image = Image.open(io.BytesIO(image_bytes)) # Perform OCR text = pytesseract.image_to_string(image) return text except Exception as e: print(f"[WARN] OCR Extraction Error: {e}") return ""