""" Generator for Pixagram Pixel Art with FaceID ============================================ This generator: 1. Detects if a face is present in the input 2. If face detected: extracts identity and preserves it during generation 3. Applies pixel art style via LoRA 4. Uses depth control for structure preservation Commercial License: All components use commercially-permissive licenses """ import torch import torch.nn.functional as F from config import Config from utils import resize_image_to_1mp, get_caption from PIL import Image from typing import Optional, Tuple class Generator: """ Pixel Art Generator with Face Identity Preservation Features: - Automatic face detection - Identity-preserving generation - LoRA style application - Depth-based structure control """ def __init__(self, model_handler): self.mh = model_handler def prepare_control_images(self, image, width: int, height: int) -> Image.Image: """Generate depth map for structural control.""" print(f" Generating depth map ({width}x{height})...") depth_map_raw = self.mh.zoe_detector(image) depth_map = depth_map_raw.resize((width, height), Image.LANCZOS) return depth_map def prepare_face_tokens( self, image: Image.Image, use_faceid: bool = True, face_padding: float = 0.3 ) -> Tuple[Optional[torch.Tensor], bool]: """ Prepare face tokens if face is detected. Calls get_face_tokens directly which handles detection + encoding in a single pass (no redundant check_face call). Returns: (face_tokens, face_detected) tuple """ if not use_faceid: return None, False # get_face_tokens does detection + crop + encode in one pass # Returns None if no face found face_tokens = self.mh.get_face_tokens(image, padding=face_padding) if face_tokens is not None: print(f" [FaceID] Face detected - identity encoded: {face_tokens.shape}") return face_tokens, True print(" [FaceID] No face detected - skipping identity preservation") return None, False def predict( self, input_image: Image.Image, user_prompt: str = "", negative_prompt: str = "", guidance_scale: float = 1.5, num_inference_steps: int = 6, img2img_strength: float = 0.3, depth_strength: float = 0.3, seed: int = -1, # FaceID parameters use_faceid: bool = True, faceid_strength: float = 0.7, face_padding: float = 0.3 ) -> Image.Image: """ Generate pixel art with optional face identity preservation. Args: input_image: Source image user_prompt: Custom prompt (optional) negative_prompt: Negative prompt guidance_scale: CFG scale num_inference_steps: Number of steps (LCM: 4-10) img2img_strength: How much to change from original depth_strength: ControlNet depth strength seed: Random seed (-1 for random) use_faceid: Whether to detect and preserve face identity faceid_strength: Strength of face identity preservation (0-1.5) face_padding: Padding around detected face crop Returns: Generated pixel art image """ print("\n" + "-" * 50) print("Processing Input...") print("-" * 50) # Resize image processed_image = resize_image_to_1mp(input_image) target_width, target_height = processed_image.size print(f" Image size: {target_width}x{target_height}") # Build prompt if not user_prompt.strip(): try: generated_caption = get_caption(processed_image) final_prompt = f"{Config.STYLE_TRIGGER1}, {generated_caption}, {Config.STYLE_TRIGGER2}" print(f" Auto-caption: {generated_caption}") except Exception as e: print(f" Captioning failed: {e}, using default prompt.") final_prompt = f"{Config.STYLE_TRIGGER1}, a beautiful pixel art image, {Config.STYLE_TRIGGER2}" else: final_prompt = f"{Config.STYLE_TRIGGER1}, {user_prompt}, {Config.STYLE_TRIGGER2}" print(f" Prompt: {final_prompt[:80]}...") # Generate depth map print("\nGenerating Control Map (Depth)...") depth_map = self.prepare_control_images( processed_image, target_width, target_height ) # Prepare FaceID (if enabled and face detected) print("\nChecking for face identity...") face_tokens, face_detected = self.prepare_face_tokens( processed_image, use_faceid=use_faceid, face_padding=face_padding ) # Set face data on FaceID processors (no-op if no face detected) if face_detected and face_tokens is not None: print(f" Setting face data on processors (strength: {faceid_strength})") self.mh.set_face_data(face_tokens, faceid_strength) # Setup seed if seed == -1 or seed is None: seed = torch.Generator().seed() generator = torch.Generator(device=Config.DEVICE).manual_seed(int(seed)) print(f"\nUsing seed: {seed}") # Run pipeline print("\nRunning pipeline...") try: result = self.mh.pipeline( prompt=final_prompt, negative_prompt=negative_prompt, image=processed_image, control_image=depth_map, generator=generator, strength=img2img_strength, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, controlnet_conditioning_scale=depth_strength, clip_skip=Config.CLIP_SKIP, ).images[0] finally: # Always clear face data after generation self.mh.clear_face_data() print("-" * 50) print("Generation complete!") if face_detected: print(" [FaceID] Face identity preserved ✓") print("-" * 50 + "\n") return result class GeneratorWithCallback(Generator): """ Extended generator with callback support for progress tracking. """ def predict_with_progress( self, input_image: Image.Image, progress_callback=None, **kwargs ) -> Image.Image: """ Generate with progress callback. Args: input_image: Source image progress_callback: Function to call with (step, total_steps, latents) **kwargs: Other arguments passed to predict() Returns: Generated image """ # Store original callback original_callback = self.mh.pipeline.progress_bar def wrapped_callback(step, timestep, latents): if progress_callback: progress_callback(step, kwargs.get('num_inference_steps', 6), latents) try: # This would require modifying the pipeline call # For now, just call predict directly return self.predict(input_image, **kwargs) finally: pass print("[OK] Generator ready (FaceID enabled)")