"""
Utility functions for Pixagram Pixel Art Generator
With FaceID support utilities

All components use commercially-permissive licenses.
"""
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import torch
from config import Config
import cv2
import numpy as np
from typing import Optional, Tuple, List

# Simple global caching for the captioner
captioner_processor = None
captioner_model = None


def preload_captioner():
    """
    Preload the BLIP captioning model at startup.
    Call this during model initialization to avoid the 990MB download
    hitting on the first generation.
    """
    global captioner_processor, captioner_model
    if captioner_model is None:
        print("  Loading Captioner (BLIP)...")
        captioner_processor = BlipProcessor.from_pretrained(Config.CAPTIONER_REPO)
        captioner_model = BlipForConditionalGeneration.from_pretrained(
            Config.CAPTIONER_REPO
        ).to(Config.DEVICE)
        print("  [OK] Captioner loaded")


def resize_image_to_1mp(image: Image.Image) -> Image.Image:
    """
    Resizes image to approx 1MP (e.g., 1024x1024) preserving aspect ratio.
    
    Args:
        image: Input PIL Image
        
    Returns:
        Resized image with dimensions divisible by 64
    """
    image = image.convert("RGB")
    w, h = image.size
    target_pixels = 1024 * 1024 
    aspect_ratio = w / h
    
    # Calculate new dimensions
    new_h = int((target_pixels / aspect_ratio) ** 0.5)
    new_w = int(new_h * aspect_ratio)
    
    # Ensure divisibility by 64 for efficiency
    new_w = (new_w // 64) * 64
    new_h = (new_h // 64) * 64
    
    if new_w == 0 or new_h == 0:
        new_w, new_h = 1024, 1024  # Fallback
        
    return image.resize((new_w, new_h), Image.LANCZOS)


def get_caption(image: Image.Image) -> str:
    """
    Generates a caption for the image using BLIP.
    
    Args:
        image: Input PIL Image
        
    Returns:
        Generated caption string
    """
    global captioner_processor, captioner_model
    
    # Ensure loaded (no-op if preload_captioner was already called)
    if captioner_model is None:
        preload_captioner()

    inputs = captioner_processor(image, return_tensors="pt").to(Config.DEVICE)
    out = captioner_model.generate(**inputs, max_new_tokens=50)
    caption = captioner_processor.decode(out[0], skip_special_tokens=True)
    return caption


# ============================================================
# FACE UTILITIES (Commercial-Friendly)
# ============================================================

def detect_faces_opencv(
    image: Image.Image,
    min_size: Tuple[int, int] = (30, 30)
) -> List[Tuple[int, int, int, int]]:
    """
    Detect faces using OpenCV Haar Cascades.
    
    License: BSD (Commercial OK)
    
    Args:
        image: Input PIL Image
        min_size: Minimum face size to detect
        
    Returns:
        List of (x, y, width, height) bounding boxes
    """
    # Convert to OpenCV format
    image_np = np.array(image)
    if len(image_np.shape) == 2:
        gray = image_np
    else:
        gray = cv2.cvtColor(image_np, cv2.COLOR_RGB2GRAY)
    
    # Load cascade (BSD License)
    face_cascade = cv2.CascadeClassifier(
        cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
    )
    
    # Detect faces
    faces = face_cascade.detectMultiScale(
        gray,
        scaleFactor=1.1,
        minNeighbors=5,
        minSize=min_size
    )
    
    return [tuple(f) for f in faces]


def crop_face(
    image: Image.Image,
    bbox: Tuple[int, int, int, int],
    padding: float = 0.3
) -> Image.Image:
    """
    Crop face region from image with padding.
    
    Args:
        image: Input PIL Image
        bbox: (x, y, width, height) bounding box
        padding: Fraction of face size to add as padding
        
    Returns:
        Cropped face image
    """
    x, y, w, h = bbox
    img_w, img_h = image.size
    
    # Add padding
    pad_w = int(w * padding)
    pad_h = int(h * padding)
    
    x1 = max(0, x - pad_w)
    y1 = max(0, y - pad_h)
    x2 = min(img_w, x + w + pad_w)
    y2 = min(img_h, y + h + pad_h)
    
    return image.crop((x1, y1, x2, y2))


def get_largest_face(
    image: Image.Image,
    padding: float = 0.3
) -> Optional[Image.Image]:
    """
    Get the largest face from an image.
    
    Args:
        image: Input PIL Image
        padding: Padding around detected face
        
    Returns:
        Cropped face image or None if no face detected
    """
    faces = detect_faces_opencv(image)
    
    if len(faces) == 0:
        return None
    
    # Get largest by area
    largest = max(faces, key=lambda f: f[2] * f[3])
    
    return crop_face(image, largest, padding)


def has_face(image: Image.Image) -> bool:
    """
    Quick check if image contains a face.
    
    Args:
        image: Input PIL Image
        
    Returns:
        True if face detected
    """
    faces = detect_faces_opencv(image)
    return len(faces) > 0


def visualize_face_detection(
    image: Image.Image,
    color: Tuple[int, int, int] = (0, 255, 0),
    thickness: int = 2
) -> Image.Image:
    """
    Draw bounding boxes around detected faces.
    
    Args:
        image: Input PIL Image
        color: Box color (RGB)
        thickness: Line thickness
        
    Returns:
        Image with face boxes drawn
    """
    image_np = np.array(image.copy())
    faces = detect_faces_opencv(image)
    
    for (x, y, w, h) in faces:
        cv2.rectangle(
            image_np,
            (x, y),
            (x + w, y + h),
            color,
            thickness
        )
    
    return Image.fromarray(image_np)


def align_face(
    image: Image.Image,
    target_size: Tuple[int, int] = (224, 224)
) -> Optional[Image.Image]:
    """
    Detect, crop, and align face for encoding.
    
    This provides a standardized face crop suitable for
    face encoding models.
    
    Args:
        image: Input PIL Image
        target_size: Output size for the face crop
        
    Returns:
        Aligned face image or None if no face detected
    """
    face_crop = get_largest_face(image, padding=0.4)
    
    if face_crop is None:
        return None
    
    # Resize to target size
    face_aligned = face_crop.resize(target_size, Image.LANCZOS)
    
    return face_aligned


def compare_face_regions(
    image1: Image.Image,
    image2: Image.Image
) -> float:
    """
    Compare face regions between two images using histogram comparison.
    
    This is a simple similarity metric that doesn't require
    a face recognition model. Useful for basic verification.
    
    Args:
        image1: First image
        image2: Second image
        
    Returns:
        Similarity score (0-1, higher is more similar)
    """
    face1 = get_largest_face(image1)
    face2 = get_largest_face(image2)
    
    if face1 is None or face2 is None:
        return 0.0
    
    # Resize to same size
    size = (128, 128)
    face1 = face1.resize(size, Image.LANCZOS)
    face2 = face2.resize(size, Image.LANCZOS)
    
    # Convert to LAB color space for better comparison
    face1_np = cv2.cvtColor(np.array(face1), cv2.COLOR_RGB2LAB)
    face2_np = cv2.cvtColor(np.array(face2), cv2.COLOR_RGB2LAB)
    
    # Compute histograms
    hist1 = cv2.calcHist([face1_np], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    hist2 = cv2.calcHist([face2_np], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
    
    # Normalize
    cv2.normalize(hist1, hist1)
    cv2.normalize(hist2, hist2)
    
    # Compare using correlation
    similarity = cv2.compareHist(hist1, hist2, cv2.HISTCMP_CORREL)
    
    return max(0.0, similarity)


print("[OK] Utils loaded (with face utilities)")