Reza2kn
/

openmed-persian-pii-tookabert-large-coreml-int4

Token Classification

openmed-persian

Model card Files Files and versions

openmed-persian-pii-tookabert-large-coreml-int4 / inference_coreml.py

Reza2kn's picture

Upload folder using huggingface_hub

7bdf716 verified 19 days ago

History Blame Contribute Delete

1.49 kB

	"""Minimal CoreML wrapper sketch for the OpenMed Persian PII INT4 models.

	The exported CoreML graph was verified with an all-ones attention path. Tokenize
	each sliding window to exactly 256 tokens, pass int32 arrays, ignore special/pad
	offsets while building spans, then run deterministic regex/rule cleanup.
	"""
	from __future__ import annotations

	import numpy as np
	import coremltools as ct
	from transformers import AutoTokenizer


	class OpenMedPersianPIICoreML:
	def __init__(self, model_path="model.4bit-palettized.mlpackage", tokenizer_path=".", max_length=256):
	self.model = ct.models.MLModel(model_path)
	self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
	self.max_length = max_length

	def logits_for_window(self, text: str):
	enc = self.tokenizer(
	text,
	return_offsets_mapping=True,
	return_tensors="np",
	truncation=True,
	padding="max_length",
	max_length=self.max_length,
	)
	offsets = enc.pop("offset_mapping")[0]
	input_ids = enc["input_ids"].astype(np.int32)
	token_type_ids = enc.get("token_type_ids", np.zeros_like(input_ids)).astype(np.int32)
	attention_mask = np.ones_like(input_ids, dtype=np.int32)
	out = self.model.predict({
	"input_ids": input_ids,
	"attention_mask": attention_mask,
	"token_type_ids": token_type_ids,
	})
	return out["logits"][0], offsets