Pixa's picture
Upload 4 files
8e2062d verified
"""
LucasArts Pixel Art Style — Hugging Face Space
Transform face photos into LucasArts adventure-game pixel art using:
- SDXL base (AlbedoBase XL v2.1)
- InstantID ControlNet for face identity preservation
- ZoeDepth ControlNet for structural preservation
- LucasArts LoRA (primerz/pixagram → lucasart.safetensors)
- DPMSolver++ scheduler (traditional SDXL, not LCM)
Architecture inspired by fofr's face-to-many.
"""
import spaces
import gradio as gr
import torch
import time
import cv2
import numpy as np
from PIL import Image
torch.jit.script = lambda f: f # Disable JIT for compatibility
from huggingface_hub import hf_hub_download, snapshot_download
from diffusers.models import ControlNetModel
from diffusers import AutoencoderKL, DPMSolverMultistepScheduler
from controlnet_aux import ZoeDetector
from insightface.app import FaceAnalysis
from pipeline_stable_diffusion_xl_instantid_img2img import (
StableDiffusionXLInstantIDImg2ImgPipeline,
draw_kps,
)
# ============================================================
# CONFIGURATION
# ============================================================
TITLE = "LucasArts Pixel Art Style"
DESCRIPTION = """Transform any face photo into LucasArts adventure-game pixel art.
Uses InstantID for face identity + ZoeDepth for structure + LucasArts LoRA style."""
# Model repos
BASE_MODEL_REPO = "frankjoshua/albedobaseXL_v21"
VAE_REPO = "madebyollin/sdxl-vae-fp16-fix"
INSTANTID_REPO = "InstantX/InstantID"
ZOEDEPTH_CN_REPO = "diffusers/controlnet-zoe-depth-sdxl-1.0"
ANNOTATOR_REPO = "lllyasviel/Annotators"
ANTELOPE_REPO = "DIAMONIK7777/antelopev2"
# LucasArts LoRA
LORA_REPO = "primerz/pixagram"
LORA_FILENAME = "lucasart.safetensors"
LORA_STRENGTH = 0.9
TRIGGER_WORD = "lucasarts style"
# Generation defaults
DEFAULT_PROMPT = "a person"
DEFAULT_NEGATIVE = (
"ugly, artifacts, blurry, deformed, disfigured, low quality, "
"watermark, text, photo-realistic, photography, realistic"
)
DEFAULT_GUIDANCE_SCALE = 7.0
DEFAULT_STEPS = 20
DEFAULT_FACE_STRENGTH = 0.85
DEFAULT_IMAGE_STRENGTH = 0.15
DEFAULT_DEPTH_STRENGTH = 0.8
DEVICE = "cuda"
DTYPE = torch.float16
# ============================================================
# MODEL LOADING (runs once at startup)
# ============================================================
print("=" * 60)
print("Loading LucasArts Pixel Art Space")
print("=" * 60)
# 1. InsightFace — face detection & embedding
print("\n[1/6] Loading InsightFace (antelopev2)...")
st = time.time()
snapshot_download(repo_id=ANTELOPE_REPO, local_dir="/data/models/antelopev2")
face_app = FaceAnalysis(
name="antelopev2",
root="/data",
providers=["CPUExecutionProvider"],
)
face_app.prepare(ctx_id=0, det_size=(640, 640))
print(f" [OK] InsightFace loaded ({time.time() - st:.1f}s)")
# 2. InstantID ControlNet
print("\n[2/6] Loading InstantID ControlNet...")
st = time.time()
hf_hub_download(
repo_id=INSTANTID_REPO,
filename="ControlNetModel/config.json",
local_dir="/data/checkpoints",
)
hf_hub_download(
repo_id=INSTANTID_REPO,
filename="ControlNetModel/diffusion_pytorch_model.safetensors",
local_dir="/data/checkpoints",
)
hf_hub_download(
repo_id=INSTANTID_REPO,
filename="ip-adapter.bin",
local_dir="/data/checkpoints",
)
identitynet = ControlNetModel.from_pretrained(
"/data/checkpoints/ControlNetModel", torch_dtype=DTYPE
)
print(f" [OK] InstantID ControlNet loaded ({time.time() - st:.1f}s)")
# 3. ZoeDepth ControlNet
print("\n[3/6] Loading ZoeDepth ControlNet...")
st = time.time()
zoedepthnet = ControlNetModel.from_pretrained(
ZOEDEPTH_CN_REPO, torch_dtype=DTYPE
)
print(f" [OK] ZoeDepth ControlNet loaded ({time.time() - st:.1f}s)")
# 4. SDXL Pipeline with dual ControlNet
print("\n[4/6] Loading SDXL Pipeline...")
st = time.time()
vae = AutoencoderKL.from_pretrained(VAE_REPO, torch_dtype=DTYPE)
pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
BASE_MODEL_REPO,
vae=vae,
controlnet=[identitynet, zoedepthnet],
torch_dtype=DTYPE,
)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(
pipe.scheduler.config, use_karras_sigmas=True
)
pipe.load_ip_adapter_instantid("/data/checkpoints/ip-adapter.bin")
pipe.set_ip_adapter_scale(0.8)
print(f" [OK] Pipeline loaded ({time.time() - st:.1f}s)")
# 5. Load and fuse LucasArts LoRA
print("\n[5/6] Loading LucasArts LoRA...")
st = time.time()
pipe.load_lora_weights(LORA_REPO, weight_name=LORA_FILENAME)
pipe.fuse_lora(LORA_STRENGTH)
print(f" [OK] LoRA fused at strength {LORA_STRENGTH} ({time.time() - st:.1f}s)")
# 6. ZoeDetector for depth maps
print("\n[6/6] Loading ZoeDetector...")
st = time.time()
zoe = ZoeDetector.from_pretrained(ANNOTATOR_REPO)
zoe.to(DEVICE)
print(f" [OK] ZoeDetector loaded ({time.time() - st:.1f}s)")
# Move pipeline to GPU
pipe.to(DEVICE)
print("\n" + "=" * 60)
print("All models loaded — ready to generate!")
print("=" * 60 + "\n")
# ============================================================
# HELPERS
# ============================================================
def center_crop_square(img: Image.Image) -> Image.Image:
"""Center-crop an image to a square."""
square_size = min(img.size)
left = (img.width - square_size) / 2
top = (img.height - square_size) / 2
right = (img.width + square_size) / 2
bottom = (img.height + square_size) / 2
return img.crop((left, top, right, bottom))
def extract_face(image: Image.Image):
"""
Detect face with InsightFace, return (embedding, keypoints_image).
Raises gr.Error if no face found.
"""
bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
faces = face_app.get(bgr)
if not faces:
raise gr.Error(
"No face detected in your image. Please upload a clear face photo."
)
# Use the largest face
face_info = sorted(
faces,
key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]),
)[-1]
face_emb = face_info["embedding"]
face_kps = draw_kps(image, face_info["kps"])
return face_emb, face_kps
# ============================================================
# GENERATION
# ============================================================
@spaces.GPU(duration=90)
def generate(
face_image: Image.Image,
prompt: str,
negative_prompt: str,
face_strength: float,
image_strength: float,
depth_strength: float,
guidance_scale: float,
num_steps: int,
) -> tuple:
"""Generate LucasArts-style pixel art from a face photo."""
if face_image is None:
gr.Warning("Please upload a face photo first!")
return None, "No image provided"
try:
# Prepare image (square crop, 1024x1024)
face_image = center_crop_square(face_image)
face_image = face_image.resize((1024, 1024), Image.LANCZOS)
# Extract face embedding + keypoints
face_emb, face_kps = extract_face(face_image)
# Generate depth map
with torch.no_grad():
depth_image = zoe(face_image)
# Dual control images: [InstantID keypoints, ZoeDepth]
w, h = face_kps.size
control_images = [face_kps, depth_image.resize((w, h))]
# Build prompt with trigger word
full_prompt = f"{TRIGGER_WORD}, {prompt}" if prompt else TRIGGER_WORD
neg = negative_prompt if negative_prompt else None
# Generate
result = pipe(
prompt=full_prompt,
negative_prompt=neg,
image_embeds=face_emb,
image=face_image,
control_image=control_images,
strength=1.0 - image_strength,
num_inference_steps=num_steps,
guidance_scale=guidance_scale,
controlnet_conditioning_scale=[face_strength, depth_strength],
width=1024,
height=1024,
).images[0]
info = (
f"Prompt: {full_prompt}\n"
f"Steps: {num_steps} | Guidance: {guidance_scale}\n"
f"Face: {face_strength} | Image: {image_strength} | Depth: {depth_strength}"
)
return result, info
except gr.Error:
raise
except Exception as e:
gr.Error(f"Generation failed: {str(e)}")
return None, f"Error: {str(e)}"
# ============================================================
# GRADIO UI
# ============================================================
with gr.Blocks(
title=TITLE,
theme=gr.themes.Soft(primary_hue="amber", secondary_hue="orange"),
) as demo:
gr.Markdown(f"# 🎮 {TITLE}")
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column(scale=1):
input_image = gr.Image(
label="📷 Upload a face photo",
type="pil",
height=400,
)
prompt = gr.Textbox(
label="✨ Prompt",
value=DEFAULT_PROMPT,
placeholder="Describe the subject (e.g., a pirate captain, a wizard)...",
lines=2,
)
generate_btn = gr.Button(
"🎮 Generate LucasArts Style",
variant="primary",
size="lg",
)
with gr.Accordion("⚙️ Advanced Settings", open=False):
negative_prompt = gr.Textbox(
label="Negative Prompt",
value=DEFAULT_NEGATIVE,
lines=2,
)
face_strength = gr.Slider(
label="Face Identity Strength",
minimum=0.0,
maximum=2.0,
value=DEFAULT_FACE_STRENGTH,
step=0.01,
info="Higher = more face likeness, less creative freedom",
)
image_strength = gr.Slider(
label="Image Strength",
minimum=0.0,
maximum=1.0,
value=DEFAULT_IMAGE_STRENGTH,
step=0.01,
info="Higher = more similarity to original photo structure/colors",
)
depth_strength = gr.Slider(
label="Depth ControlNet Strength",
minimum=0.0,
maximum=1.0,
value=DEFAULT_DEPTH_STRENGTH,
step=0.01,
info="Higher = more structural preservation from depth map",
)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=1.0,
maximum=15.0,
value=DEFAULT_GUIDANCE_SCALE,
step=0.1,
info="Higher = stronger prompt adherence",
)
num_steps = gr.Slider(
label="Inference Steps",
minimum=10,
maximum=50,
value=DEFAULT_STEPS,
step=1,
info="More steps = higher quality but slower",
)
with gr.Column(scale=1):
output_image = gr.Image(
label="🖼️ LucasArts Style Result",
type="pil",
height=400,
)
gen_info = gr.Textbox(
label="📋 Generation Info",
lines=4,
interactive=False,
)
gr.Markdown("### 💡 Prompt Ideas")
gr.Examples(
examples=[
["a pirate captain"],
["a wizard in a dark tower"],
["a detective in a noir city"],
["a space adventurer"],
["a medieval knight"],
],
inputs=[prompt],
label="Click to use",
)
gr.Markdown(
"---\n"
"**Architecture:** SDXL + InstantID + ZoeDepth ControlNet + LucasArts LoRA \n"
"**Scheduler:** DPMSolver++ (Karras) \n"
"**Inspired by:** fofr's [face-to-many](https://github.com/fofr/cog-face-to-many)"
)
# Wire up
generate_btn.click(
fn=generate,
inputs=[
input_image,
prompt,
negative_prompt,
face_strength,
image_strength,
depth_strength,
guidance_scale,
num_steps,
],
outputs=[output_image, gen_info],
)
if __name__ == "__main__":
demo.queue()
demo.launch(share=True)