Upload 4 files

8e2062d verified 3 months ago

12.5 kB

	"""
	LucasArts Pixel Art Style — Hugging Face Space

	Transform face photos into LucasArts adventure-game pixel art using:
	- SDXL base (AlbedoBase XL v2.1)
	- InstantID ControlNet for face identity preservation
	- ZoeDepth ControlNet for structural preservation
	- LucasArts LoRA (primerz/pixagram → lucasart.safetensors)
	- DPMSolver++ scheduler (traditional SDXL, not LCM)

	Architecture inspired by fofr's face-to-many.
	"""

	import spaces
	import gradio as gr
	import torch
	import time
	import cv2
	import numpy as np
	from PIL import Image

	torch.jit.script = lambda f: f # Disable JIT for compatibility

	from huggingface_hub import hf_hub_download, snapshot_download
	from diffusers.models import ControlNetModel
	from diffusers import AutoencoderKL, DPMSolverMultistepScheduler
	from controlnet_aux import ZoeDetector
	from insightface.app import FaceAnalysis

	from pipeline_stable_diffusion_xl_instantid_img2img import (
	StableDiffusionXLInstantIDImg2ImgPipeline,
	draw_kps,
	)

	# ============================================================
	# CONFIGURATION
	# ============================================================

	TITLE = "LucasArts Pixel Art Style"
	DESCRIPTION = """Transform any face photo into LucasArts adventure-game pixel art.
	Uses InstantID for face identity + ZoeDepth for structure + LucasArts LoRA style."""

	# Model repos
	BASE_MODEL_REPO = "frankjoshua/albedobaseXL_v21"
	VAE_REPO = "madebyollin/sdxl-vae-fp16-fix"
	INSTANTID_REPO = "InstantX/InstantID"
	ZOEDEPTH_CN_REPO = "diffusers/controlnet-zoe-depth-sdxl-1.0"
	ANNOTATOR_REPO = "lllyasviel/Annotators"
	ANTELOPE_REPO = "DIAMONIK7777/antelopev2"

	# LucasArts LoRA
	LORA_REPO = "primerz/pixagram"
	LORA_FILENAME = "lucasart.safetensors"
	LORA_STRENGTH = 0.9
	TRIGGER_WORD = "lucasarts style"

	# Generation defaults
	DEFAULT_PROMPT = "a person"
	DEFAULT_NEGATIVE = (
	"ugly, artifacts, blurry, deformed, disfigured, low quality, "
	"watermark, text, photo-realistic, photography, realistic"
	)
	DEFAULT_GUIDANCE_SCALE = 7.0
	DEFAULT_STEPS = 20
	DEFAULT_FACE_STRENGTH = 0.85
	DEFAULT_IMAGE_STRENGTH = 0.15
	DEFAULT_DEPTH_STRENGTH = 0.8

	DEVICE = "cuda"
	DTYPE = torch.float16

	# ============================================================
	# MODEL LOADING (runs once at startup)
	# ============================================================

	print("=" * 60)
	print("Loading LucasArts Pixel Art Space")
	print("=" * 60)

	# 1. InsightFace — face detection & embedding
	print("\n[1/6] Loading InsightFace (antelopev2)...")
	st = time.time()
	snapshot_download(repo_id=ANTELOPE_REPO, local_dir="/data/models/antelopev2")
	face_app = FaceAnalysis(
	name="antelopev2",
	root="/data",
	providers=["CPUExecutionProvider"],
	)
	face_app.prepare(ctx_id=0, det_size=(640, 640))
	print(f" [OK] InsightFace loaded ({time.time() - st:.1f}s)")

	# 2. InstantID ControlNet
	print("\n[2/6] Loading InstantID ControlNet...")
	st = time.time()
	hf_hub_download(
	repo_id=INSTANTID_REPO,
	filename="ControlNetModel/config.json",
	local_dir="/data/checkpoints",
	)
	hf_hub_download(
	repo_id=INSTANTID_REPO,
	filename="ControlNetModel/diffusion_pytorch_model.safetensors",
	local_dir="/data/checkpoints",
	)
	hf_hub_download(
	repo_id=INSTANTID_REPO,
	filename="ip-adapter.bin",
	local_dir="/data/checkpoints",
	)
	identitynet = ControlNetModel.from_pretrained(
	"/data/checkpoints/ControlNetModel", torch_dtype=DTYPE
	)
	print(f" [OK] InstantID ControlNet loaded ({time.time() - st:.1f}s)")

	# 3. ZoeDepth ControlNet
	print("\n[3/6] Loading ZoeDepth ControlNet...")
	st = time.time()
	zoedepthnet = ControlNetModel.from_pretrained(
	ZOEDEPTH_CN_REPO, torch_dtype=DTYPE
	)
	print(f" [OK] ZoeDepth ControlNet loaded ({time.time() - st:.1f}s)")

	# 4. SDXL Pipeline with dual ControlNet
	print("\n[4/6] Loading SDXL Pipeline...")
	st = time.time()
	vae = AutoencoderKL.from_pretrained(VAE_REPO, torch_dtype=DTYPE)
	pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
	BASE_MODEL_REPO,
	vae=vae,
	controlnet=[identitynet, zoedepthnet],
	torch_dtype=DTYPE,
	)
	pipe.scheduler = DPMSolverMultistepScheduler.from_config(
	pipe.scheduler.config, use_karras_sigmas=True
	)
	pipe.load_ip_adapter_instantid("/data/checkpoints/ip-adapter.bin")
	pipe.set_ip_adapter_scale(0.8)
	print(f" [OK] Pipeline loaded ({time.time() - st:.1f}s)")

	# 5. Load and fuse LucasArts LoRA
	print("\n[5/6] Loading LucasArts LoRA...")
	st = time.time()
	pipe.load_lora_weights(LORA_REPO, weight_name=LORA_FILENAME)
	pipe.fuse_lora(LORA_STRENGTH)
	print(f" [OK] LoRA fused at strength {LORA_STRENGTH} ({time.time() - st:.1f}s)")

	# 6. ZoeDetector for depth maps
	print("\n[6/6] Loading ZoeDetector...")
	st = time.time()
	zoe = ZoeDetector.from_pretrained(ANNOTATOR_REPO)
	zoe.to(DEVICE)
	print(f" [OK] ZoeDetector loaded ({time.time() - st:.1f}s)")

	# Move pipeline to GPU
	pipe.to(DEVICE)

	print("\n" + "=" * 60)
	print("All models loaded — ready to generate!")
	print("=" * 60 + "\n")


	# ============================================================
	# HELPERS
	# ============================================================

	def center_crop_square(img: Image.Image) -> Image.Image:
	"""Center-crop an image to a square."""
	square_size = min(img.size)
	left = (img.width - square_size) / 2
	top = (img.height - square_size) / 2
	right = (img.width + square_size) / 2
	bottom = (img.height + square_size) / 2
	return img.crop((left, top, right, bottom))


	def extract_face(image: Image.Image):
	"""
	Detect face with InsightFace, return (embedding, keypoints_image).
	Raises gr.Error if no face found.
	"""
	bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
	faces = face_app.get(bgr)
	if not faces:
	raise gr.Error(
	"No face detected in your image. Please upload a clear face photo."
	)
	# Use the largest face
	face_info = sorted(
	faces,
	key=lambda x: (x["bbox"][2] - x["bbox"][0]) * (x["bbox"][3] - x["bbox"][1]),
	)[-1]
	face_emb = face_info["embedding"]
	face_kps = draw_kps(image, face_info["kps"])
	return face_emb, face_kps


	# ============================================================
	# GENERATION
	# ============================================================

	@spaces.GPU(duration=90)
	def generate(
	face_image: Image.Image,
	prompt: str,
	negative_prompt: str,
	face_strength: float,
	image_strength: float,
	depth_strength: float,
	guidance_scale: float,
	num_steps: int,
	) -> tuple:
	"""Generate LucasArts-style pixel art from a face photo."""

	if face_image is None:
	gr.Warning("Please upload a face photo first!")
	return None, "No image provided"

	try:
	# Prepare image (square crop, 1024x1024)
	face_image = center_crop_square(face_image)
	face_image = face_image.resize((1024, 1024), Image.LANCZOS)

	# Extract face embedding + keypoints
	face_emb, face_kps = extract_face(face_image)

	# Generate depth map
	with torch.no_grad():
	depth_image = zoe(face_image)

	# Dual control images: [InstantID keypoints, ZoeDepth]
	w, h = face_kps.size
	control_images = [face_kps, depth_image.resize((w, h))]

	# Build prompt with trigger word
	full_prompt = f"{TRIGGER_WORD}, {prompt}" if prompt else TRIGGER_WORD
	neg = negative_prompt if negative_prompt else None

	# Generate
	result = pipe(
	prompt=full_prompt,
	negative_prompt=neg,
	image_embeds=face_emb,
	image=face_image,
	control_image=control_images,
	strength=1.0 - image_strength,
	num_inference_steps=num_steps,
	guidance_scale=guidance_scale,
	controlnet_conditioning_scale=[face_strength, depth_strength],
	width=1024,
	height=1024,
	).images[0]

	info = (
	f"Prompt: {full_prompt}\n"
	f"Steps: {num_steps} \| Guidance: {guidance_scale}\n"
	f"Face: {face_strength} \| Image: {image_strength} \| Depth: {depth_strength}"
	)

	return result, info

	except gr.Error:
	raise
	except Exception as e:
	gr.Error(f"Generation failed: {str(e)}")
	return None, f"Error: {str(e)}"


	# ============================================================
	# GRADIO UI
	# ============================================================

	with gr.Blocks(
	title=TITLE,
	theme=gr.themes.Soft(primary_hue="amber", secondary_hue="orange"),
	) as demo:

	gr.Markdown(f"# 🎮 {TITLE}")
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(
	label="📷 Upload a face photo",
	type="pil",
	height=400,
	)

	prompt = gr.Textbox(
	label="✨ Prompt",
	value=DEFAULT_PROMPT,
	placeholder="Describe the subject (e.g., a pirate captain, a wizard)...",
	lines=2,
	)

	generate_btn = gr.Button(
	"🎮 Generate LucasArts Style",
	variant="primary",
	size="lg",
	)

	with gr.Accordion("⚙️ Advanced Settings", open=False):
	negative_prompt = gr.Textbox(
	label="Negative Prompt",
	value=DEFAULT_NEGATIVE,
	lines=2,
	)

	face_strength = gr.Slider(
	label="Face Identity Strength",
	minimum=0.0,
	maximum=2.0,
	value=DEFAULT_FACE_STRENGTH,
	step=0.01,
	info="Higher = more face likeness, less creative freedom",
	)

	image_strength = gr.Slider(
	label="Image Strength",
	minimum=0.0,
	maximum=1.0,
	value=DEFAULT_IMAGE_STRENGTH,
	step=0.01,
	info="Higher = more similarity to original photo structure/colors",
	)

	depth_strength = gr.Slider(
	label="Depth ControlNet Strength",
	minimum=0.0,
	maximum=1.0,
	value=DEFAULT_DEPTH_STRENGTH,
	step=0.01,
	info="Higher = more structural preservation from depth map",
	)

	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=1.0,
	maximum=15.0,
	value=DEFAULT_GUIDANCE_SCALE,
	step=0.1,
	info="Higher = stronger prompt adherence",
	)

	num_steps = gr.Slider(
	label="Inference Steps",
	minimum=10,
	maximum=50,
	value=DEFAULT_STEPS,
	step=1,
	info="More steps = higher quality but slower",
	)

	with gr.Column(scale=1):
	output_image = gr.Image(
	label="🖼️ LucasArts Style Result",
	type="pil",
	height=400,
	)

	gen_info = gr.Textbox(
	label="📋 Generation Info",
	lines=4,
	interactive=False,
	)

	gr.Markdown("### 💡 Prompt Ideas")
	gr.Examples(
	examples=[
	["a pirate captain"],
	["a wizard in a dark tower"],
	["a detective in a noir city"],
	["a space adventurer"],
	["a medieval knight"],
	],
	inputs=[prompt],
	label="Click to use",
	)

	gr.Markdown(
	"---\n"
	"Architecture: SDXL + InstantID + ZoeDepth ControlNet + LucasArts LoRA \n"
	"Scheduler: DPMSolver++ (Karras) \n"
	"Inspired by: fofr's [face-to-many](https://github.com/fofr/cog-face-to-many)"
	)

	# Wire up
	generate_btn.click(
	fn=generate,
	inputs=[
	input_image,
	prompt,
	negative_prompt,
	face_strength,
	image_strength,
	depth_strength,
	guidance_scale,
	num_steps,
	],
	outputs=[output_image, gen_info],
	)


	if __name__ == "__main__":
	demo.queue()
	demo.launch(share=True)