from typing import List, Union import numpy as np import torch from transformers.image_processing_utils import BaseImageProcessor, BatchFeature from transformers.image_utils import is_numpy_array from cerberusdet.cerberusdet_preprocessor import CerberusPreprocessor class CerberusDetImageProcessor(BaseImageProcessor): model_input_names = ["pixel_values", "original_shapes"] def __init__( self, img_size: int = 640, stride: int = 32, auto: bool = True, half: bool = False, **kwargs ) -> None: """ Args: img_size (int): Image size (default is 640). stride (int): Model stride (default is 32). auto (bool): If True, uses minimum padding (rectangle). If False (recommended for batches), forces a square. half (bool): Whether to use FP16. """ super().__init__(**kwargs) self.img_size = img_size self.stride = stride self.auto = auto self.half = half def preprocess( self, images: List[np.ndarray], device: Union[str, torch.device], **kwargs, ) -> BatchFeature: """ Preprocess an image or batch of images. Args: images (List[np.ndarray]): List of images (BGR, as returned by cv2.imread). Returns: BatchFeature: Object with keys 'pixel_values' and 'original_shapes'. """ # 1. Validate input data if not valid_images(images): raise ValueError( "Invalid image type. Must be of type List[numpy.ndarray] (BGR format)." ) original_shapes = [img.shape[:2] for img in images] processor = CerberusPreprocessor( img_size=self.img_size, stride=self.stride, half=self.half, auto=self.auto ) # 2. Perform preprocessing # CerberusPreprocessor.preprocess returns torch.Tensor [B, 3, H, W] pixel_values = processor.preprocess(images, device=device) # 3. Format output data = {"pixel_values": pixel_values, 'original_shapes': original_shapes} return BatchFeature(data=data, tensor_type="pt") def valid_images(imgs): """ Checks that the input is a list of numpy arrays. """ if isinstance(imgs, (list, tuple)): for img in imgs: if img is None: continue if not is_numpy_array(img): return False else: return False return True