"""Minimal CoreML wrapper sketch for the OpenMed Persian PII INT4 models.

The exported CoreML graph was verified with an all-ones attention path. Tokenize
each sliding window to exactly 256 tokens, pass int32 arrays, ignore special/pad
offsets while building spans, then run deterministic regex/rule cleanup.
"""
from __future__ import annotations

import numpy as np
import coremltools as ct
from transformers import AutoTokenizer


class OpenMedPersianPIICoreML:
    def __init__(self, model_path="model.4bit-palettized.mlpackage", tokenizer_path=".", max_length=256):
        self.model = ct.models.MLModel(model_path)
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
        self.max_length = max_length

    def logits_for_window(self, text: str):
        enc = self.tokenizer(
            text,
            return_offsets_mapping=True,
            return_tensors="np",
            truncation=True,
            padding="max_length",
            max_length=self.max_length,
        )
        offsets = enc.pop("offset_mapping")[0]
        input_ids = enc["input_ids"].astype(np.int32)
        token_type_ids = enc.get("token_type_ids", np.zeros_like(input_ids)).astype(np.int32)
        attention_mask = np.ones_like(input_ids, dtype=np.int32)
        out = self.model.predict({
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "token_type_ids": token_type_ids,
        })
        return out["logits"][0], offsets