| """ |
| LucasArts Pixel Art Style — Hugging Face Space |
| |
| Transform face photos into LucasArts adventure-game pixel art using: |
| - SDXL base (AlbedoBase XL v2.1) |
| - InstantID ControlNet for face identity preservation |
| - ZoeDepth ControlNet for structural preservation |
| - LucasArts LoRA (primerz/pixagram → lucasart.safetensors) |
| - DPMSolver++ scheduler (traditional SDXL, not LCM) |
| |
| Architecture inspired by fofr's face-to-many. |
| """ |
|
|
| import spaces |
| import gradio as gr |
| import torch |
| import time |
| import cv2 |
| import numpy as np |
| from PIL import Image |
|
|
| torch.jit.script = lambda f: f |
|
|
| from huggingface_hub import hf_hub_download, snapshot_download |
| from diffusers.models import ControlNetModel |
| from diffusers import AutoencoderKL, DPMSolverMultistepScheduler |
| from controlnet_aux import ZoeDetector |
| from insightface.app import FaceAnalysis |
|
|
| from pipeline_stable_diffusion_xl_instantid_img2img import ( |
| StableDiffusionXLInstantIDImg2ImgPipeline, |
| draw_kps, |
| ) |
|
|
| |
| |
| |
|
|
| TITLE = "LucasArts Pixel Art Style" |
| DESCRIPTION = """Transform any face photo into LucasArts adventure-game pixel art. |
| Uses InstantID for face identity + ZoeDepth for structure + LucasArts LoRA style.""" |
|
|
| |
| BASE_MODEL_REPO = "frankjoshua/albedobaseXL_v21" |
| VAE_REPO = "madebyollin/sdxl-vae-fp16-fix" |
| INSTANTID_REPO = "InstantX/InstantID" |
| ZOEDEPTH_CN_REPO = "diffusers/controlnet-zoe-depth-sdxl-1.0" |
| ANNOTATOR_REPO = "lllyasviel/Annotators" |
| ANTELOPE_REPO = "DIAMONIK7777/antelopev2" |
|
|
| |
| LORA_REPO = "primerz/pixagram" |
| LORA_FILENAME = "lucasart.safetensors" |
| LORA_STRENGTH = 0.9 |
| TRIGGER_WORD = "lucasarts style" |
|
|
| |
| DEFAULT_PROMPT = "a person" |
| DEFAULT_NEGATIVE = ( |
| "ugly, artifacts, blurry, deformed, disfigured, low quality, " |
| "watermark, text, photo-realistic, photography, realistic" |
| ) |
| DEFAULT_GUIDANCE_SCALE = 7.0 |
| DEFAULT_STEPS = 20 |
| DEFAULT_FACE_STRENGTH = 0.85 |
| DEFAULT_IMAGE_STRENGTH = 0.15 |
| DEFAULT_DEPTH_STRENGTH = 0.8 |
|
|
| DEVICE = "cuda" |
| DTYPE = torch.float16 |
|
|
| |
| |
| |
|
|
| print("=" * 60) |
| print("Loading LucasArts Pixel Art Space") |
| print("=" * 60) |
|
|
| |
| print("\n[1/6] Loading InsightFace (antelopev2)...") |
| st = time.time() |
| snapshot_download(repo_id=ANTELOPE_REPO, local_dir="/data/models/antelopev2") |
| face_app = FaceAnalysis( |
| name="antelopev2", |
| root="/data", |
| providers=["CPUExecutionProvider"], |
| ) |
| face_app.prepare(ctx_id=0, det_size=(640, 640)) |
| print(f" [OK] InsightFace loaded ({time.time() - st:.1f}s)") |
|
|
| |
| print("\n[2/6] Loading InstantID ControlNet...") |
| st = time.time() |
| hf_hub_download( |
| repo_id=INSTANTID_REPO, |
| filename="ControlNetModel/config.json", |
| local_dir="/data/checkpoints", |
| ) |
| hf_hub_download( |
| repo_id=INSTANTID_REPO, |
| filename="ControlNetModel/diffusion_pytorch_model.safetensors", |
| local_dir="/data/checkpoints", |
| ) |
| hf_hub_download( |
| repo_id=INSTANTID_REPO, |
| filename="ip-adapter.bin", |
| local_dir="/data/checkpoints", |
| ) |
| identitynet = ControlNetModel.from_pretrained( |
| "/data/checkpoints/ControlNetModel", torch_dtype=DTYPE |
| ) |
| print(f" [OK] InstantID ControlNet loaded ({time.time() - st:.1f}s)") |
|
|
| |
| print("\n[3/6] Loading ZoeDepth ControlNet...") |
| st = time.time() |
| zoedepthnet = ControlNetModel.from_pretrained( |
| ZOEDEPTH_CN_REPO, torch_dtype=DTYPE |
| ) |
| print(f" [OK] ZoeDepth ControlNet loaded ({time.time() - st:.1f}s)") |
|
|
| |
| print("\n[4/6] Loading SDXL Pipeline...") |
| st = time.time() |
| vae = AutoencoderKL.from_pretrained(VAE_REPO, torch_dtype=DTYPE) |
| pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained( |
| BASE_MODEL_REPO, |
| vae=vae, |
| controlnet=[identitynet, zoedepthnet], |
| torch_dtype=DTYPE, |
| ) |
| pipe.scheduler = DPMSolverMultistepScheduler.from_config( |
| pipe.scheduler.config, use_karras_sigmas=True |
| ) |
| pipe.load_ip_adapter_instantid("/data/checkpoints/ip-adapter.bin") |
| pipe.set_ip_adapter_scale(0.8) |
| print(f" [OK] Pipeline loaded ({time.time() - st:.1f}s)") |
|
|
| |
| print("\n[5/6] Loading LucasArts LoRA...") |
| st = time.time() |
| pipe.load_lora_weights(LORA_REPO, weight_name=LORA_FILENAME) |
| pipe.fuse_lora(LORA_STRENGTH) |
| print(f" [OK] LoRA fused at strength {LORA_STRENGTH} ({time.time() - st:.1f}s)") |
|
|
| |
| print("\n[6/6] Loading ZoeDetector...") |
| st = time.time() |
| zoe = ZoeDetector.from_pretrained(ANNOTATOR_REPO) |
| zoe.to(DEVICE) |
| print(f" [OK] ZoeDetector loaded ({time.time() - st:.1f}s)") |
|
|
| |
| pipe.to(DEVICE) |
|
|
| print("\n" + "=" * 60) |
| print("All models loaded — ready to generate!") |
| print("=" * 60 + "\n") |
|
|
|
|
| |
| |
| |
|
|
| def center_crop_square(img: Image.Image) -> Image.Image: |
| """Center-crop an image to a square.""" |
| square_size = min(img.size) |
| left = (img.width - square_size) / 2 |
| top = (img.height - square_size) / 2 |
| right = (img.width + square_size) / 2 |
| bottom = (img.height + square_size) / 2 |
| return img.crop((left, top, right, bottom)) |
|
|
|
|
| def extract_face(image: Image.Image): |
| """ |
| Detect face with InsightFace, return (embedding, keypoints_image). |
| Raises gr.Error if no face found. |
| """ |
| bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) |
| faces = face_app.get(bgr) |
| if not faces: |
| raise gr.Error( |
| "No face detected in your image. Please upload a clear face photo." |
| ) |
| |
| face_info = sorted( |
| faces, |
| key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]), |
| )[-1] |
| face_emb = face_info["embedding"] |
| face_kps = draw_kps(image, face_info["kps"]) |
| return face_emb, face_kps |
|
|
|
|
| |
| |
| |
|
|
| @spaces.GPU(duration=90) |
| def generate( |
| face_image: Image.Image, |
| prompt: str, |
| negative_prompt: str, |
| face_strength: float, |
| image_strength: float, |
| depth_strength: float, |
| guidance_scale: float, |
| num_steps: int, |
| ) -> tuple: |
| """Generate LucasArts-style pixel art from a face photo.""" |
|
|
| if face_image is None: |
| gr.Warning("Please upload a face photo first!") |
| return None, "No image provided" |
|
|
| try: |
| |
| face_image = center_crop_square(face_image) |
| face_image = face_image.resize((1024, 1024), Image.LANCZOS) |
|
|
| |
| face_emb, face_kps = extract_face(face_image) |
|
|
| |
| with torch.no_grad(): |
| depth_image = zoe(face_image) |
|
|
| |
| w, h = face_kps.size |
| control_images = [face_kps, depth_image.resize((w, h))] |
|
|
| |
| full_prompt = f"{TRIGGER_WORD}, {prompt}" if prompt else TRIGGER_WORD |
| neg = negative_prompt if negative_prompt else None |
|
|
| |
| result = pipe( |
| prompt=full_prompt, |
| negative_prompt=neg, |
| image_embeds=face_emb, |
| image=face_image, |
| control_image=control_images, |
| strength=1.0 - image_strength, |
| num_inference_steps=num_steps, |
| guidance_scale=guidance_scale, |
| controlnet_conditioning_scale=[face_strength, depth_strength], |
| width=1024, |
| height=1024, |
| ).images[0] |
|
|
| info = ( |
| f"Prompt: {full_prompt}\n" |
| f"Steps: {num_steps} | Guidance: {guidance_scale}\n" |
| f"Face: {face_strength} | Image: {image_strength} | Depth: {depth_strength}" |
| ) |
|
|
| return result, info |
|
|
| except gr.Error: |
| raise |
| except Exception as e: |
| gr.Error(f"Generation failed: {str(e)}") |
| return None, f"Error: {str(e)}" |
|
|
|
|
| |
| |
| |
|
|
| with gr.Blocks( |
| title=TITLE, |
| theme=gr.themes.Soft(primary_hue="amber", secondary_hue="orange"), |
| ) as demo: |
|
|
| gr.Markdown(f"# 🎮 {TITLE}") |
| gr.Markdown(DESCRIPTION) |
|
|
| with gr.Row(): |
| with gr.Column(scale=1): |
| input_image = gr.Image( |
| label="📷 Upload a face photo", |
| type="pil", |
| height=400, |
| ) |
|
|
| prompt = gr.Textbox( |
| label="✨ Prompt", |
| value=DEFAULT_PROMPT, |
| placeholder="Describe the subject (e.g., a pirate captain, a wizard)...", |
| lines=2, |
| ) |
|
|
| generate_btn = gr.Button( |
| "🎮 Generate LucasArts Style", |
| variant="primary", |
| size="lg", |
| ) |
|
|
| with gr.Accordion("⚙️ Advanced Settings", open=False): |
| negative_prompt = gr.Textbox( |
| label="Negative Prompt", |
| value=DEFAULT_NEGATIVE, |
| lines=2, |
| ) |
|
|
| face_strength = gr.Slider( |
| label="Face Identity Strength", |
| minimum=0.0, |
| maximum=2.0, |
| value=DEFAULT_FACE_STRENGTH, |
| step=0.01, |
| info="Higher = more face likeness, less creative freedom", |
| ) |
|
|
| image_strength = gr.Slider( |
| label="Image Strength", |
| minimum=0.0, |
| maximum=1.0, |
| value=DEFAULT_IMAGE_STRENGTH, |
| step=0.01, |
| info="Higher = more similarity to original photo structure/colors", |
| ) |
|
|
| depth_strength = gr.Slider( |
| label="Depth ControlNet Strength", |
| minimum=0.0, |
| maximum=1.0, |
| value=DEFAULT_DEPTH_STRENGTH, |
| step=0.01, |
| info="Higher = more structural preservation from depth map", |
| ) |
|
|
| guidance_scale = gr.Slider( |
| label="Guidance Scale", |
| minimum=1.0, |
| maximum=15.0, |
| value=DEFAULT_GUIDANCE_SCALE, |
| step=0.1, |
| info="Higher = stronger prompt adherence", |
| ) |
|
|
| num_steps = gr.Slider( |
| label="Inference Steps", |
| minimum=10, |
| maximum=50, |
| value=DEFAULT_STEPS, |
| step=1, |
| info="More steps = higher quality but slower", |
| ) |
|
|
| with gr.Column(scale=1): |
| output_image = gr.Image( |
| label="🖼️ LucasArts Style Result", |
| type="pil", |
| height=400, |
| ) |
|
|
| gen_info = gr.Textbox( |
| label="📋 Generation Info", |
| lines=4, |
| interactive=False, |
| ) |
|
|
| gr.Markdown("### 💡 Prompt Ideas") |
| gr.Examples( |
| examples=[ |
| ["a pirate captain"], |
| ["a wizard in a dark tower"], |
| ["a detective in a noir city"], |
| ["a space adventurer"], |
| ["a medieval knight"], |
| ], |
| inputs=[prompt], |
| label="Click to use", |
| ) |
|
|
| gr.Markdown( |
| "---\n" |
| "**Architecture:** SDXL + InstantID + ZoeDepth ControlNet + LucasArts LoRA \n" |
| "**Scheduler:** DPMSolver++ (Karras) \n" |
| "**Inspired by:** fofr's [face-to-many](https://github.com/fofr/cog-face-to-many)" |
| ) |
|
|
| |
| generate_btn.click( |
| fn=generate, |
| inputs=[ |
| input_image, |
| prompt, |
| negative_prompt, |
| face_strength, |
| image_strength, |
| depth_strength, |
| guidance_scale, |
| num_steps, |
| ], |
| outputs=[output_image, gen_info], |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| demo.queue() |
| demo.launch(share=True) |
|
|