""" LucasArts Pixel Art Style — Hugging Face Space Transform face photos into LucasArts adventure-game pixel art using: - SDXL base (AlbedoBase XL v2.1) - InstantID ControlNet for face identity preservation - ZoeDepth ControlNet for structural preservation - LucasArts LoRA (primerz/pixagram → lucasart.safetensors) - DPMSolver++ scheduler (traditional SDXL, not LCM) Architecture inspired by fofr's face-to-many. """ import spaces import gradio as gr import torch import time import cv2 import numpy as np from PIL import Image torch.jit.script = lambda f: f # Disable JIT for compatibility from huggingface_hub import hf_hub_download, snapshot_download from diffusers.models import ControlNetModel from diffusers import AutoencoderKL, DPMSolverMultistepScheduler from controlnet_aux import ZoeDetector from insightface.app import FaceAnalysis from pipeline_stable_diffusion_xl_instantid_img2img import ( StableDiffusionXLInstantIDImg2ImgPipeline, draw_kps, ) # ============================================================ # CONFIGURATION # ============================================================ TITLE = "LucasArts Pixel Art Style" DESCRIPTION = """Transform any face photo into LucasArts adventure-game pixel art. Uses InstantID for face identity + ZoeDepth for structure + LucasArts LoRA style.""" # Model repos BASE_MODEL_REPO = "frankjoshua/albedobaseXL_v21" VAE_REPO = "madebyollin/sdxl-vae-fp16-fix" INSTANTID_REPO = "InstantX/InstantID" ZOEDEPTH_CN_REPO = "diffusers/controlnet-zoe-depth-sdxl-1.0" ANNOTATOR_REPO = "lllyasviel/Annotators" ANTELOPE_REPO = "DIAMONIK7777/antelopev2" # LucasArts LoRA LORA_REPO = "primerz/pixagram" LORA_FILENAME = "lucasart.safetensors" LORA_STRENGTH = 0.9 TRIGGER_WORD = "lucasarts style" # Generation defaults DEFAULT_PROMPT = "a person" DEFAULT_NEGATIVE = ( "ugly, artifacts, blurry, deformed, disfigured, low quality, " "watermark, text, photo-realistic, photography, realistic" ) DEFAULT_GUIDANCE_SCALE = 7.0 DEFAULT_STEPS = 20 DEFAULT_FACE_STRENGTH = 0.85 DEFAULT_IMAGE_STRENGTH = 0.15 DEFAULT_DEPTH_STRENGTH = 0.8 DEVICE = "cuda" DTYPE = torch.float16 # ============================================================ # MODEL LOADING (runs once at startup) # ============================================================ print("=" * 60) print("Loading LucasArts Pixel Art Space") print("=" * 60) # 1. InsightFace — face detection & embedding print("\n[1/6] Loading InsightFace (antelopev2)...") st = time.time() snapshot_download(repo_id=ANTELOPE_REPO, local_dir="/data/models/antelopev2") face_app = FaceAnalysis( name="antelopev2", root="/data", providers=["CPUExecutionProvider"], ) face_app.prepare(ctx_id=0, det_size=(640, 640)) print(f" [OK] InsightFace loaded ({time.time() - st:.1f}s)") # 2. InstantID ControlNet print("\n[2/6] Loading InstantID ControlNet...") st = time.time() hf_hub_download( repo_id=INSTANTID_REPO, filename="ControlNetModel/config.json", local_dir="/data/checkpoints", ) hf_hub_download( repo_id=INSTANTID_REPO, filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir="/data/checkpoints", ) hf_hub_download( repo_id=INSTANTID_REPO, filename="ip-adapter.bin", local_dir="/data/checkpoints", ) identitynet = ControlNetModel.from_pretrained( "/data/checkpoints/ControlNetModel", torch_dtype=DTYPE ) print(f" [OK] InstantID ControlNet loaded ({time.time() - st:.1f}s)") # 3. ZoeDepth ControlNet print("\n[3/6] Loading ZoeDepth ControlNet...") st = time.time() zoedepthnet = ControlNetModel.from_pretrained( ZOEDEPTH_CN_REPO, torch_dtype=DTYPE ) print(f" [OK] ZoeDepth ControlNet loaded ({time.time() - st:.1f}s)") # 4. SDXL Pipeline with dual ControlNet print("\n[4/6] Loading SDXL Pipeline...") st = time.time() vae = AutoencoderKL.from_pretrained(VAE_REPO, torch_dtype=DTYPE) pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained( BASE_MODEL_REPO, vae=vae, controlnet=[identitynet, zoedepthnet], torch_dtype=DTYPE, ) pipe.scheduler = DPMSolverMultistepScheduler.from_config( pipe.scheduler.config, use_karras_sigmas=True ) pipe.load_ip_adapter_instantid("/data/checkpoints/ip-adapter.bin") pipe.set_ip_adapter_scale(0.8) print(f" [OK] Pipeline loaded ({time.time() - st:.1f}s)") # 5. Load and fuse LucasArts LoRA print("\n[5/6] Loading LucasArts LoRA...") st = time.time() pipe.load_lora_weights(LORA_REPO, weight_name=LORA_FILENAME) pipe.fuse_lora(LORA_STRENGTH) print(f" [OK] LoRA fused at strength {LORA_STRENGTH} ({time.time() - st:.1f}s)") # 6. ZoeDetector for depth maps print("\n[6/6] Loading ZoeDetector...") st = time.time() zoe = ZoeDetector.from_pretrained(ANNOTATOR_REPO) zoe.to(DEVICE) print(f" [OK] ZoeDetector loaded ({time.time() - st:.1f}s)") # Move pipeline to GPU pipe.to(DEVICE) print("\n" + "=" * 60) print("All models loaded — ready to generate!") print("=" * 60 + "\n") # ============================================================ # HELPERS # ============================================================ def center_crop_square(img: Image.Image) -> Image.Image: """Center-crop an image to a square.""" square_size = min(img.size) left = (img.width - square_size) / 2 top = (img.height - square_size) / 2 right = (img.width + square_size) / 2 bottom = (img.height + square_size) / 2 return img.crop((left, top, right, bottom)) def extract_face(image: Image.Image): """ Detect face with InsightFace, return (embedding, keypoints_image). Raises gr.Error if no face found. """ bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) faces = face_app.get(bgr) if not faces: raise gr.Error( "No face detected in your image. Please upload a clear face photo." ) # Use the largest face face_info = sorted( faces, key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]), )[-1] face_emb = face_info["embedding"] face_kps = draw_kps(image, face_info["kps"]) return face_emb, face_kps # ============================================================ # GENERATION # ============================================================ @spaces.GPU(duration=90) def generate( face_image: Image.Image, prompt: str, negative_prompt: str, face_strength: float, image_strength: float, depth_strength: float, guidance_scale: float, num_steps: int, ) -> tuple: """Generate LucasArts-style pixel art from a face photo.""" if face_image is None: gr.Warning("Please upload a face photo first!") return None, "No image provided" try: # Prepare image (square crop, 1024x1024) face_image = center_crop_square(face_image) face_image = face_image.resize((1024, 1024), Image.LANCZOS) # Extract face embedding + keypoints face_emb, face_kps = extract_face(face_image) # Generate depth map with torch.no_grad(): depth_image = zoe(face_image) # Dual control images: [InstantID keypoints, ZoeDepth] w, h = face_kps.size control_images = [face_kps, depth_image.resize((w, h))] # Build prompt with trigger word full_prompt = f"{TRIGGER_WORD}, {prompt}" if prompt else TRIGGER_WORD neg = negative_prompt if negative_prompt else None # Generate result = pipe( prompt=full_prompt, negative_prompt=neg, image_embeds=face_emb, image=face_image, control_image=control_images, strength=1.0 - image_strength, num_inference_steps=num_steps, guidance_scale=guidance_scale, controlnet_conditioning_scale=[face_strength, depth_strength], width=1024, height=1024, ).images[0] info = ( f"Prompt: {full_prompt}\n" f"Steps: {num_steps} | Guidance: {guidance_scale}\n" f"Face: {face_strength} | Image: {image_strength} | Depth: {depth_strength}" ) return result, info except gr.Error: raise except Exception as e: gr.Error(f"Generation failed: {str(e)}") return None, f"Error: {str(e)}" # ============================================================ # GRADIO UI # ============================================================ with gr.Blocks( title=TITLE, theme=gr.themes.Soft(primary_hue="amber", secondary_hue="orange"), ) as demo: gr.Markdown(f"# 🎮 {TITLE}") gr.Markdown(DESCRIPTION) with gr.Row(): with gr.Column(scale=1): input_image = gr.Image( label="📷 Upload a face photo", type="pil", height=400, ) prompt = gr.Textbox( label="✨ Prompt", value=DEFAULT_PROMPT, placeholder="Describe the subject (e.g., a pirate captain, a wizard)...", lines=2, ) generate_btn = gr.Button( "🎮 Generate LucasArts Style", variant="primary", size="lg", ) with gr.Accordion("⚙️ Advanced Settings", open=False): negative_prompt = gr.Textbox( label="Negative Prompt", value=DEFAULT_NEGATIVE, lines=2, ) face_strength = gr.Slider( label="Face Identity Strength", minimum=0.0, maximum=2.0, value=DEFAULT_FACE_STRENGTH, step=0.01, info="Higher = more face likeness, less creative freedom", ) image_strength = gr.Slider( label="Image Strength", minimum=0.0, maximum=1.0, value=DEFAULT_IMAGE_STRENGTH, step=0.01, info="Higher = more similarity to original photo structure/colors", ) depth_strength = gr.Slider( label="Depth ControlNet Strength", minimum=0.0, maximum=1.0, value=DEFAULT_DEPTH_STRENGTH, step=0.01, info="Higher = more structural preservation from depth map", ) guidance_scale = gr.Slider( label="Guidance Scale", minimum=1.0, maximum=15.0, value=DEFAULT_GUIDANCE_SCALE, step=0.1, info="Higher = stronger prompt adherence", ) num_steps = gr.Slider( label="Inference Steps", minimum=10, maximum=50, value=DEFAULT_STEPS, step=1, info="More steps = higher quality but slower", ) with gr.Column(scale=1): output_image = gr.Image( label="🖼️ LucasArts Style Result", type="pil", height=400, ) gen_info = gr.Textbox( label="📋 Generation Info", lines=4, interactive=False, ) gr.Markdown("### 💡 Prompt Ideas") gr.Examples( examples=[ ["a pirate captain"], ["a wizard in a dark tower"], ["a detective in a noir city"], ["a space adventurer"], ["a medieval knight"], ], inputs=[prompt], label="Click to use", ) gr.Markdown( "---\n" "**Architecture:** SDXL + InstantID + ZoeDepth ControlNet + LucasArts LoRA \n" "**Scheduler:** DPMSolver++ (Karras) \n" "**Inspired by:** fofr's [face-to-many](https://github.com/fofr/cog-face-to-many)" ) # Wire up generate_btn.click( fn=generate, inputs=[ input_image, prompt, negative_prompt, face_strength, image_strength, depth_strength, guidance_scale, num_steps, ], outputs=[output_image, gen_info], ) if __name__ == "__main__": demo.queue() demo.launch(share=True)