| """Minimal CoreML wrapper sketch for the OpenMed Persian PII INT4 models. |
| |
| The exported CoreML graph was verified with an all-ones attention path. Tokenize |
| each sliding window to exactly 256 tokens, pass int32 arrays, ignore special/pad |
| offsets while building spans, then run deterministic regex/rule cleanup. |
| """ |
| from __future__ import annotations |
|
|
| import numpy as np |
| import coremltools as ct |
| from transformers import AutoTokenizer |
|
|
|
|
| class OpenMedPersianPIICoreML: |
| def __init__(self, model_path="model.4bit-palettized.mlpackage", tokenizer_path=".", max_length=256): |
| self.model = ct.models.MLModel(model_path) |
| self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) |
| self.max_length = max_length |
|
|
| def logits_for_window(self, text: str): |
| enc = self.tokenizer( |
| text, |
| return_offsets_mapping=True, |
| return_tensors="np", |
| truncation=True, |
| padding="max_length", |
| max_length=self.max_length, |
| ) |
| offsets = enc.pop("offset_mapping")[0] |
| input_ids = enc["input_ids"].astype(np.int32) |
| token_type_ids = enc.get("token_type_ids", np.zeros_like(input_ids)).astype(np.int32) |
| attention_mask = np.ones_like(input_ids, dtype=np.int32) |
| out = self.model.predict({ |
| "input_ids": input_ids, |
| "attention_mask": attention_mask, |
| "token_type_ids": token_type_ids, |
| }) |
| return out["logits"][0], offsets |
|
|