"""Minimal CoreML wrapper sketch for the OpenMed Persian PII INT4 models. The exported CoreML graph was verified with an all-ones attention path. Tokenize each sliding window to exactly 256 tokens, pass int32 arrays, ignore special/pad offsets while building spans, then run deterministic regex/rule cleanup. """ from __future__ import annotations import numpy as np import coremltools as ct from transformers import AutoTokenizer class OpenMedPersianPIICoreML: def __init__(self, model_path="model.4bit-palettized.mlpackage", tokenizer_path=".", max_length=256): self.model = ct.models.MLModel(model_path) self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) self.max_length = max_length def logits_for_window(self, text: str): enc = self.tokenizer( text, return_offsets_mapping=True, return_tensors="np", truncation=True, padding="max_length", max_length=self.max_length, ) offsets = enc.pop("offset_mapping")[0] input_ids = enc["input_ids"].astype(np.int32) token_type_ids = enc.get("token_type_ids", np.zeros_like(input_ids)).astype(np.int32) attention_mask = np.ones_like(input_ids, dtype=np.int32) out = self.model.predict({ "input_ids": input_ids, "attention_mask": attention_mask, "token_type_ids": token_type_ids, }) return out["logits"][0], offsets