Pixa
/

face-to-pixel-art-sdxl

Model card Files Files and versions

xet

Community

Pixa commited on Mar 10

Commit

8e2062d

verified ·

1 Parent(s): 05e96d0

Upload 4 files

Browse files

Files changed (3) hide show

README.md +53 -3
app.py +405 -0
requirements.txt +14 -0

README.md CHANGED Viewed

@@ -1,3 +1,53 @@
----
-license: mit
----

+---
+title: LucasArts Pixel Art Style
+emoji: 🎮
+colorFrom: amber
+colorTo: orange
+sdk: gradio
+sdk_version: 5.12.0
+app_file: app.py
+pinned: true
+license: mit
+short_description: Transform face photos into LucasArts adventure-game pixel art!
+disable_embedding: false
+---
+# 🎮 LucasArts Pixel Art Style
+Transform any face photo into LucasArts adventure-game pixel art using AI.
+## Architecture
+- **Base Model:** AlbedoBase XL v2.1 (SDXL)
+- **Face Identity:** InstantID ControlNet + IP-Adapter
+- **Depth Structure:** ZoeDepth ControlNet
+- **Style LoRA:** LucasArts pixel art (`primerz/pixagram → lucasart.safetensors`)
+- **Scheduler:** DPMSolver++ with Karras sigmas
+- **Face Detection:** InsightFace (antelopev2)
+## How It Works
+1. Upload a clear face photo
+2. Write a prompt describing the character
+3. Adjust face/depth/image strength as needed
+4. Click "Generate LucasArts Style"
+The pipeline detects your face, extracts identity embeddings, generates a depth map,
+and uses dual ControlNets to produce a pixel-art image that preserves your likeness
+while applying the LucasArts adventure-game aesthetic.
+## Key Parameters
+| Parameter | Default | Effect |
+|-----------|---------|--------|
+| Face Identity Strength | 0.85 | Higher = more likeness, less style freedom |
+| Image Strength | 0.15 | Higher = closer to original photo |
+| Depth Strength | 0.80 | Higher = more structural preservation |
+| Guidance Scale | 7.0 | Higher = stronger prompt adherence |
+| Steps | 20 | More = higher quality, slower |
+## Credits
+- Inspired by fofr's [face-to-many](https://github.com/fofr/cog-face-to-many)
+- InstantID by [InstantX](https://huggingface.co/InstantX/InstantID)
+- LucasArts LoRA from [primerz/pixagram](https://huggingface.co/primerz/pixagram)

app.py ADDED Viewed

	@@ -0,0 +1,405 @@

+"""
+LucasArts Pixel Art Style — Hugging Face Space
+Transform face photos into LucasArts adventure-game pixel art using:
+  - SDXL base (AlbedoBase XL v2.1)
+  - InstantID ControlNet for face identity preservation
+  - ZoeDepth ControlNet for structural preservation
+  - LucasArts LoRA (primerz/pixagram → lucasart.safetensors)
+  - DPMSolver++ scheduler (traditional SDXL, not LCM)
+Architecture inspired by fofr's face-to-many.
+"""
+import spaces
+import gradio as gr
+import torch
+import time
+import cv2
+import numpy as np
+from PIL import Image
+torch.jit.script = lambda f: f  # Disable JIT for compatibility
+from huggingface_hub import hf_hub_download, snapshot_download
+from diffusers.models import ControlNetModel
+from diffusers import AutoencoderKL, DPMSolverMultistepScheduler
+from controlnet_aux import ZoeDetector
+from insightface.app import FaceAnalysis
+from pipeline_stable_diffusion_xl_instantid_img2img import (
+    StableDiffusionXLInstantIDImg2ImgPipeline,
+    draw_kps,
+)
+# ============================================================
+# CONFIGURATION
+# ============================================================
+TITLE = "LucasArts Pixel Art Style"
+DESCRIPTION = """Transform any face photo into LucasArts adventure-game pixel art.
+Uses InstantID for face identity + ZoeDepth for structure + LucasArts LoRA style."""
+# Model repos
+BASE_MODEL_REPO = "frankjoshua/albedobaseXL_v21"
+VAE_REPO = "madebyollin/sdxl-vae-fp16-fix"
+INSTANTID_REPO = "InstantX/InstantID"
+ZOEDEPTH_CN_REPO = "diffusers/controlnet-zoe-depth-sdxl-1.0"
+ANNOTATOR_REPO = "lllyasviel/Annotators"
+ANTELOPE_REPO = "DIAMONIK7777/antelopev2"
+# LucasArts LoRA
+LORA_REPO = "primerz/pixagram"
+LORA_FILENAME = "lucasart.safetensors"
+LORA_STRENGTH = 0.9
+TRIGGER_WORD = "lucasarts style"
+# Generation defaults
+DEFAULT_PROMPT = "a person"
+DEFAULT_NEGATIVE = (
+    "ugly, artifacts, blurry, deformed, disfigured, low quality, "
+    "watermark, text, photo-realistic, photography, realistic"
+)
+DEFAULT_GUIDANCE_SCALE = 7.0
+DEFAULT_STEPS = 20
+DEFAULT_FACE_STRENGTH = 0.85
+DEFAULT_IMAGE_STRENGTH = 0.15
+DEFAULT_DEPTH_STRENGTH = 0.8
+DEVICE = "cuda"
+DTYPE = torch.float16
+# ============================================================
+# MODEL LOADING (runs once at startup)
+# ============================================================
+print("=" * 60)
+print("Loading LucasArts Pixel Art Space")
+print("=" * 60)
+# 1. InsightFace — face detection & embedding
+print("\n[1/6] Loading InsightFace (antelopev2)...")
+st = time.time()
+snapshot_download(repo_id=ANTELOPE_REPO, local_dir="/data/models/antelopev2")
+face_app = FaceAnalysis(
+    name="antelopev2",
+    root="/data",
+    providers=["CPUExecutionProvider"],
+)
+face_app.prepare(ctx_id=0, det_size=(640, 640))
+print(f"  [OK] InsightFace loaded ({time.time() - st:.1f}s)")
+# 2. InstantID ControlNet
+print("\n[2/6] Loading InstantID ControlNet...")
+st = time.time()
+hf_hub_download(
+    repo_id=INSTANTID_REPO,
+    filename="ControlNetModel/config.json",
+    local_dir="/data/checkpoints",
+)
+hf_hub_download(
+    repo_id=INSTANTID_REPO,
+    filename="ControlNetModel/diffusion_pytorch_model.safetensors",
+    local_dir="/data/checkpoints",
+)
+hf_hub_download(
+    repo_id=INSTANTID_REPO,
+    filename="ip-adapter.bin",
+    local_dir="/data/checkpoints",
+)
+identitynet = ControlNetModel.from_pretrained(
+    "/data/checkpoints/ControlNetModel", torch_dtype=DTYPE
+)
+print(f"  [OK] InstantID ControlNet loaded ({time.time() - st:.1f}s)")
+# 3. ZoeDepth ControlNet
+print("\n[3/6] Loading ZoeDepth ControlNet...")
+st = time.time()
+zoedepthnet = ControlNetModel.from_pretrained(
+    ZOEDEPTH_CN_REPO, torch_dtype=DTYPE
+)
+print(f"  [OK] ZoeDepth ControlNet loaded ({time.time() - st:.1f}s)")
+# 4. SDXL Pipeline with dual ControlNet
+print("\n[4/6] Loading SDXL Pipeline...")
+st = time.time()
+vae = AutoencoderKL.from_pretrained(VAE_REPO, torch_dtype=DTYPE)
+pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
+    BASE_MODEL_REPO,
+    vae=vae,
+    controlnet=[identitynet, zoedepthnet],
+    torch_dtype=DTYPE,
+)
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(
+    pipe.scheduler.config, use_karras_sigmas=True
+)
+pipe.load_ip_adapter_instantid("/data/checkpoints/ip-adapter.bin")
+pipe.set_ip_adapter_scale(0.8)
+print(f"  [OK] Pipeline loaded ({time.time() - st:.1f}s)")
+# 5. Load and fuse LucasArts LoRA
+print("\n[5/6] Loading LucasArts LoRA...")
+st = time.time()
+pipe.load_lora_weights(LORA_REPO, weight_name=LORA_FILENAME)
+pipe.fuse_lora(LORA_STRENGTH)
+print(f"  [OK] LoRA fused at strength {LORA_STRENGTH} ({time.time() - st:.1f}s)")
+# 6. ZoeDetector for depth maps
+print("\n[6/6] Loading ZoeDetector...")
+st = time.time()
+zoe = ZoeDetector.from_pretrained(ANNOTATOR_REPO)
+zoe.to(DEVICE)
+print(f"  [OK] ZoeDetector loaded ({time.time() - st:.1f}s)")
+# Move pipeline to GPU
+pipe.to(DEVICE)
+print("\n" + "=" * 60)
+print("All models loaded — ready to generate!")
+print("=" * 60 + "\n")
+# ============================================================
+# HELPERS
+# ============================================================
+def center_crop_square(img: Image.Image) -> Image.Image:
+    """Center-crop an image to a square."""
+    square_size = min(img.size)
+    left = (img.width - square_size) / 2
+    top = (img.height - square_size) / 2
+    right = (img.width + square_size) / 2
+    bottom = (img.height + square_size) / 2
+    return img.crop((left, top, right, bottom))
+def extract_face(image: Image.Image):
+    """
+    Detect face with InsightFace, return (embedding, keypoints_image).
+    Raises gr.Error if no face found.
+    """
+    bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    faces = face_app.get(bgr)
+    if not faces:
+        raise gr.Error(
+            "No face detected in your image. Please upload a clear face photo."
+        )
+    # Use the largest face
+    face_info = sorted(
+        faces,
+        key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]),
+    )[-1]
+    face_emb = face_info["embedding"]
+    face_kps = draw_kps(image, face_info["kps"])
+    return face_emb, face_kps
+# ============================================================
+# GENERATION
+# ============================================================
+@spaces.GPU(duration=90)
+def generate(
+    face_image: Image.Image,
+    prompt: str,
+    negative_prompt: str,
+    face_strength: float,
+    image_strength: float,
+    depth_strength: float,
+    guidance_scale: float,
+    num_steps: int,
+) -> tuple:
+    """Generate LucasArts-style pixel art from a face photo."""
+    if face_image is None:
+        gr.Warning("Please upload a face photo first!")
+        return None, "No image provided"
+    try:
+        # Prepare image (square crop, 1024x1024)
+        face_image = center_crop_square(face_image)
+        face_image = face_image.resize((1024, 1024), Image.LANCZOS)
+        # Extract face embedding + keypoints
+        face_emb, face_kps = extract_face(face_image)
+        # Generate depth map
+        with torch.no_grad():
+            depth_image = zoe(face_image)
+        # Dual control images: [InstantID keypoints, ZoeDepth]
+        w, h = face_kps.size
+        control_images = [face_kps, depth_image.resize((w, h))]
+        # Build prompt with trigger word
+        full_prompt = f"{TRIGGER_WORD}, {prompt}" if prompt else TRIGGER_WORD
+        neg = negative_prompt if negative_prompt else None
+        # Generate
+        result = pipe(
+            prompt=full_prompt,
+            negative_prompt=neg,
+            image_embeds=face_emb,
+            image=face_image,
+            control_image=control_images,
+            strength=1.0 - image_strength,
+            num_inference_steps=num_steps,
+            guidance_scale=guidance_scale,
+            controlnet_conditioning_scale=[face_strength, depth_strength],
+            width=1024,
+            height=1024,
+        ).images[0]
+        info = (
+            f"Prompt: {full_prompt}\n"
+            f"Steps: {num_steps} | Guidance: {guidance_scale}\n"
+            f"Face: {face_strength} | Image: {image_strength} | Depth: {depth_strength}"
+        )
+        return result, info
+    except gr.Error:
+        raise
+    except Exception as e:
+        gr.Error(f"Generation failed: {str(e)}")
+        return None, f"Error: {str(e)}"
+# ============================================================
+# GRADIO UI
+# ============================================================
+with gr.Blocks(
+    title=TITLE,
+    theme=gr.themes.Soft(primary_hue="amber", secondary_hue="orange"),
+) as demo:
+    gr.Markdown(f"# 🎮 {TITLE}")
+    gr.Markdown(DESCRIPTION)
+    with gr.Row():
+        with gr.Column(scale=1):
+            input_image = gr.Image(
+                label="📷 Upload a face photo",
+                type="pil",
+                height=400,
+            )
+            prompt = gr.Textbox(
+                label="✨ Prompt",
+                value=DEFAULT_PROMPT,
+                placeholder="Describe the subject (e.g., a pirate captain, a wizard)...",
+                lines=2,
+            )
+            generate_btn = gr.Button(
+                "🎮 Generate LucasArts Style",
+                variant="primary",
+                size="lg",
+            )
+            with gr.Accordion("⚙️ Advanced Settings", open=False):
+                negative_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value=DEFAULT_NEGATIVE,
+                    lines=2,
+                )
+                face_strength = gr.Slider(
+                    label="Face Identity Strength",
+                    minimum=0.0,
+                    maximum=2.0,
+                    value=DEFAULT_FACE_STRENGTH,
+                    step=0.01,
+                    info="Higher = more face likeness, less creative freedom",
+                )
+                image_strength = gr.Slider(
+                    label="Image Strength",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=DEFAULT_IMAGE_STRENGTH,
+                    step=0.01,
+                    info="Higher = more similarity to original photo structure/colors",
+                )
+                depth_strength = gr.Slider(
+                    label="Depth ControlNet Strength",
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=DEFAULT_DEPTH_STRENGTH,
+                    step=0.01,
+                    info="Higher = more structural preservation from depth map",
+                )
+                guidance_scale = gr.Slider(
+                    label="Guidance Scale",
+                    minimum=1.0,
+                    maximum=15.0,
+                    value=DEFAULT_GUIDANCE_SCALE,
+                    step=0.1,
+                    info="Higher = stronger prompt adherence",
+                )
+                num_steps = gr.Slider(
+                    label="Inference Steps",
+                    minimum=10,
+                    maximum=50,
+                    value=DEFAULT_STEPS,
+                    step=1,
+                    info="More steps = higher quality but slower",
+                )
+        with gr.Column(scale=1):
+            output_image = gr.Image(
+                label="🖼️ LucasArts Style Result",
+                type="pil",
+                height=400,
+            )
+            gen_info = gr.Textbox(
+                label="📋 Generation Info",
+                lines=4,
+                interactive=False,
+            )
+    gr.Markdown("### 💡 Prompt Ideas")
+    gr.Examples(
+        examples=[
+            ["a pirate captain"],
+            ["a wizard in a dark tower"],
+            ["a detective in a noir city"],
+            ["a space adventurer"],
+            ["a medieval knight"],
+        ],
+        inputs=[prompt],
+        label="Click to use",
+    )
+    gr.Markdown(
+        "---\n"
+        "**Architecture:** SDXL + InstantID + ZoeDepth ControlNet + LucasArts LoRA  \n"
+        "**Scheduler:** DPMSolver++ (Karras)  \n"
+        "**Inspired by:** fofr's [face-to-many](https://github.com/fofr/cog-face-to-many)"
+    )
+    # Wire up
+    generate_btn.click(
+        fn=generate,
+        inputs=[
+            input_image,
+            prompt,
+            negative_prompt,
+            face_strength,
+            image_strength,
+            depth_strength,
+            guidance_scale,
+            num_steps,
+        ],
+        outputs=[output_image, gen_info],
+    )
+if __name__ == "__main__":
+    demo.queue()
+    demo.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+diffusers>=0.25.0
+transformers
+accelerate
+safetensors
+torch
+torchvision
+controlnet_aux
+insightface
+onnxruntime
+huggingface_hub
+gradio>=4.0.0
+opencv-python-headless
+numpy
+Pillow