""" Utility functions for Pixagram Pixel Art Generator With FaceID support utilities All components use commercially-permissive licenses. """ from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration import torch from config import Config import cv2 import numpy as np from typing import Optional, Tuple, List # Simple global caching for the captioner captioner_processor = None captioner_model = None def preload_captioner(): """ Preload the BLIP captioning model at startup. Call this during model initialization to avoid the 990MB download hitting on the first generation. """ global captioner_processor, captioner_model if captioner_model is None: print(" Loading Captioner (BLIP)...") captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO) captioner_model = BlipForConditionalGeneration.from_pretrained( Config.CAPTIONER_REPO ).to(Config.DEVICE) print(" [OK] Captioner loaded") def resize_image_to_1mp(image: Image.Image) -> Image.Image: """ Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio. Args: image: Input PIL Image Returns: Resized image with dimensions divisible by 64 """ image = image.convert("RGB") w, h = image.size target_pixels = 1024 * 1024 aspect_ratio = w / h # Calculate new dimensions new_h = int((target_pixels / aspect_ratio) ** 0.5) new_w = int(new_h * aspect_ratio) # Ensure divisibility by 64 for efficiency new_w = (new_w // 64) * 64 new_h = (new_h // 64) * 64 if new_w == 0 or new_h == 0: new_w, new_h = 1024, 1024 # Fallback return image.resize((new_w, new_h), Image.LANCZOS) def get_caption(image: Image.Image) -> str: """ Generates a caption for the image using BLIP. Args: image: Input PIL Image Returns: Generated caption string """ global captioner_processor, captioner_model # Ensure loaded (no-op if preload_captioner was already called) if captioner_model is None: preload_captioner() inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE) out = captioner_model.generate(**inputs, max_new_tokens=50) caption = captioner_processor.decode(out[0], skip_special_tokens=True) return caption # ============================================================ # FACE UTILITIES (Commercial-Friendly) # ============================================================ def detect_faces_opencv( image: Image.Image, min_size: Tuple[int, int] = (30, 30) ) -> List[Tuple[int, int, int, int]]: """ Detect faces using OpenCV Haar Cascades. License: BSD (Commercial OK) Args: image: Input PIL Image min_size: Minimum face size to detect Returns: List of (x, y, width, height) bounding boxes """ # Convert to OpenCV format image_np = np.array(image) if len(image_np.shape) == 2: gray = image_np else: gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY) # Load cascade (BSD License) face_cascade = cv2.CascadeClassifier( cv2.data.haarcascades + 'haarcascade_frontalface_default.xml' ) # Detect faces faces = face_cascade.detectMultiScale( gray, scaleFactor=1.1, minNeighbors=5, minSize=min_size ) return [tuple(f) for f in faces] def crop_face( image: Image.Image, bbox: Tuple[int, int, int, int], padding: float = 0.3 ) -> Image.Image: """ Crop face region from image with padding. Args: image: Input PIL Image bbox: (x, y, width, height) bounding box padding: Fraction of face size to add as padding Returns: Cropped face image """ x, y, w, h = bbox img_w, img_h = image.size # Add padding pad_w = int(w * padding) pad_h = int(h * padding) x1 = max(0, x - pad_w) y1 = max(0, y - pad_h) x2 = min(img_w, x + w + pad_w) y2 = min(img_h, y + h + pad_h) return image.crop((x1, y1, x2, y2)) def get_largest_face( image: Image.Image, padding: float = 0.3 ) -> Optional[Image.Image]: """ Get the largest face from an image. Args: image: Input PIL Image padding: Padding around detected face Returns: Cropped face image or None if no face detected """ faces = detect_faces_opencv(image) if len(faces) == 0: return None # Get largest by area largest = max(faces, key=lambda f: f[2] * f[3]) return crop_face(image, largest, padding) def has_face(image: Image.Image) -> bool: """ Quick check if image contains a face. Args: image: Input PIL Image Returns: True if face detected """ faces = detect_faces_opencv(image) return len(faces) > 0 def visualize_face_detection( image: Image.Image, color: Tuple[int, int, int] = (0, 255, 0), thickness: int = 2 ) -> Image.Image: """ Draw bounding boxes around detected faces. Args: image: Input PIL Image color: Box color (RGB) thickness: Line thickness Returns: Image with face boxes drawn """ image_np = np.array(image.copy()) faces = detect_faces_opencv(image) for (x, y, w, h) in faces: cv2.rectangle( image_np, (x, y), (x + w, y + h), color, thickness ) return Image.fromarray(image_np) def align_face( image: Image.Image, target_size: Tuple[int, int] = (224, 224) ) -> Optional[Image.Image]: """ Detect, crop, and align face for encoding. This provides a standardized face crop suitable for face encoding models. Args: image: Input PIL Image target_size: Output size for the face crop Returns: Aligned face image or None if no face detected """ face_crop = get_largest_face(image, padding=0.4) if face_crop is None: return None # Resize to target size face_aligned = face_crop.resize(target_size, Image.LANCZOS) return face_aligned def compare_face_regions( image1: Image.Image, image2: Image.Image ) -> float: """ Compare face regions between two images using histogram comparison. This is a simple similarity metric that doesn't require a face recognition model. Useful for basic verification. Args: image1: First image image2: Second image Returns: Similarity score (0-1, higher is more similar) """ face1 = get_largest_face(image1) face2 = get_largest_face(image2) if face1 is None or face2 is None: return 0.0 # Resize to same size size = (128, 128) face1 = face1.resize(size, Image.LANCZOS) face2 = face2.resize(size, Image.LANCZOS) # Convert to LAB color space for better comparison face1_np = cv2.cvtColor(np.array(face1), cv2.COLOR_RGB2LAB) face2_np = cv2.cvtColor(np.array(face2), cv2.COLOR_RGB2LAB) # Compute histograms hist1 = cv2.calcHist([face1_np], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) hist2 = cv2.calcHist([face2_np], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256]) # Normalize cv2.normalize(hist1, hist1) cv2.normalize(hist2, hist2) # Compare using correlation similarity = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL) return max(0.0, similarity) print("[OK] Utils loaded (with face utilities)")