Spaces:

CREATORJD
/

Testroom

Build error

App Files Files Community

Testroom / app.py

CREATORJD

Upload 2 files

41f4e26 verified 17 days ago

Raw

History Blame

13.5 kB

	"""
	DARKROOM HandRefiner — Hugging Face ZeroGPU Space
	=================================================
	Standard Gradio Interface (the pattern ZeroGPU actually supports): upload an
	image, optionally paint a mask, get the hands structurally fixed on a free
	on-demand GPU. This is the reliable shape — the previous "custom FastAPI route"
	build failed with "No @spaces.GPU function detected" because ZeroGPU only
	detects GPU functions wired into a normal Gradio app.

	PIPELINE: MeshGraphormer hand-mesh -> depth map -> depth ControlNet ->
	Stable Diffusion inpainting (HandRefiner). Fixes only the hand region.

	--------------------------------------------------------------------------
	DEPLOY (needs a HF PRO account to CREATE a ZeroGPU Space — $9/mo)
	--------------------------------------------------------------------------
	1. huggingface.co -> New Space -> SDK: Gradio -> Hardware: ZeroGPU
	2. Upload: app.py, requirements.txt, README.md
	3. Wait for build, then use the Space UI (or call it from the DARKROOM tool
	via the gradio_client endpoint shown on the Space's "View API" page).

	HONEST LIMITS:
	* Creating a ZeroGPU Space requires PRO. Using one is free within a daily quota
	(resets 24h after first use); each fix is a few GPU-seconds.
	* GPU duration is capped (~120s max). We request 90s.
	* Stock depth ControlNet is okay-not-perfect; swap CONTROLNET_ID to
	hr16/ControlNet-HandRefiner-pruned for finetuned quality.
	* MeshGraphormer can't fix unreadable hands or crossed fingers.
	"""

	import spaces # must precede torch for ZeroGPU
	import torch
	from PIL import Image, ImageFilter
	import gradio as gr

	# ---------------------------------------------------------------------------
	# transformers compatibility shim (fixes MeshGraphormer import on new transformers)
	# Newer transformers removed prune_linear_layer / Conv1D from transformers.modeling_utils,
	# which is exactly what breaks the vendored MeshGraphormer (ComfyUI issue #578).
	# Re-expose them so the legacy import succeeds.
	# ---------------------------------------------------------------------------
	def _patch_transformers():
	try:
	import transformers.modeling_utils as mu
	need = ("prune_linear_layer", "Conv1D", "prune_layer")
	if all(hasattr(mu, n) for n in need):
	return
	from transformers import pytorch_utils as pu
	for n in need:
	if not hasattr(mu, n) and hasattr(pu, n):
	setattr(mu, n, getattr(pu, n))
	print("[shim] transformers symbols patched", flush=True)
	except Exception as e:
	print("[shim] transformers patch skipped:", e, flush=True)
	_patch_transformers()

	SD_INPAINT_ID = "runwayml/stable-diffusion-inpainting"
	CONTROLNET_ID = "lllyasviel/control_v11f1p_sd15_depth"
	TILE_CN_ID = "lllyasviel/control_v11f1e_sd15_tile" # detail-regeneration ControlNet
	SD_BASE_ID = "runwayml/stable-diffusion-v1-5" # base SD for img2img detail pass
	MESHGRAPHORMER_ID = "hr16/ControlNet-HandRefiner-pruned"
	MAX_SIDE = 768
	DETAIL_MAX_SIDE = 1280 # detail pass can work larger since it's tiled-friendly
	DEFAULT_PROMPT = "a detailed, anatomically correct hand with five fingers, natural proportions, same art style and lighting"
	NEG = "extra fingers, fused fingers, missing fingers, deformed, mutated, blurry, low quality"
	DETAIL_NEG = "blurry, soft, out of focus, jpeg artifacts, low quality, smudged, messy lines"

	_PIPE = None
	_MESH = None
	_DETAIL = None
	_MESH_OK = False
	_MESH_ERR = None

	def _make_mesh_detector():
	"""controlnet_aux==0.0.6 ships MeshGraphormerDetector at the top level.
	(Newer versions dropped it — that's why the pin matters.)"""
	from controlnet_aux import MeshGraphormerDetector as MGD
	return MGD.from_pretrained(MESHGRAPHORMER_ID)

	def _load():
	"""Load SD inpaint + ControlNet (always works, diffusers-only) and attempt
	MeshGraphormer (optional). If MeshGraphormer fails, the Space still runs;
	hand auto-detect is then unavailable but manual-mask + detail pass work."""
	global _PIPE, _MESH, _MESH_OK, _MESH_ERR
	if _PIPE is not None:
	return
	import time
	from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, UniPCMultistepScheduler
	t0 = time.time()
	print("[load] starting model load on CPU…", flush=True)
	# MeshGraphormer is optional — isolate it so it can't crash the container
	try:
	_MESH = _make_mesh_detector()
	_MESH_OK = True
	print(f"[load] meshgraphormer ok ({time.time()-t0:.0f}s)", flush=True)
	except Exception as e:
	_MESH = None; _MESH_OK = False; _MESH_ERR = str(e)
	print("[load] meshgraphormer UNAVAILABLE (manual mask still works):", e, flush=True)
	cn = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16)
	pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
	SD_INPAINT_ID, controlnet=cn, torch_dtype=torch.float16, safety_checker=None
	)
	pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
	try: pipe.enable_attention_slicing()
	except Exception as e: print("[load] attn-slicing skip:", e, flush=True)
	try: pipe.enable_vae_tiling()
	except Exception as e: print("[load] vae-tiling skip:", e, flush=True)
	_PIPE = pipe
	print(f"[load] pipeline ready on CPU ({time.time()-t0:.0f}s total)", flush=True)

	# preload at import — runs once when the container boots, OUTSIDE any GPU-timed window
	try:
	_load()
	except Exception as _e:
	print("[load] preload deferred:", _e, flush=True)

	def _load_detail():
	"""Tile-ControlNet img2img pipeline for detail/lineart recovery. Loaded lazily on CPU."""
	global _DETAIL
	if _DETAIL is not None:
	return
	import time
	from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel, UniPCMultistepScheduler
	t0 = time.time()
	print("[load] detail pipeline (tile CN) on CPU…", flush=True)
	tile = ControlNetModel.from_pretrained(TILE_CN_ID, torch_dtype=torch.float16)
	pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
	SD_BASE_ID, controlnet=tile, torch_dtype=torch.float16, safety_checker=None
	)
	pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
	try: pipe.enable_attention_slicing()
	except Exception as e: print("[load] attn-slicing skip:", e, flush=True)
	try: pipe.enable_vae_tiling()
	except Exception as e: print("[load] vae-tiling skip:", e, flush=True)
	_DETAIL = pipe
	print(f"[load] detail pipeline ready ({time.time()-t0:.0f}s)", flush=True)

	def _fit_to(img, max_side):
	w, h = img.size
	s = min(1.0, max_side / max(w, h))
	return img.resize((max(8, int(round(ws/8))8), max(8, int(round(hs/8))8)), Image.LANCZOS), (w, h)

	def _fit(img):
	w, h = img.size
	s = min(1.0, MAX_SIDE / max(w, h))
	return img.resize((max(8, int(round(ws/8))8), max(8, int(round(hs/8))8)), Image.LANCZOS), (w, h)

	@spaces.GPU(duration=120)
	def fix_hands(image, mask_layers, prompt, strength):
	"""ZeroGPU-allocated worker. Models are already loaded (CPU) at import;
	here we move them onto the GPU that ZeroGPU just attached, then infer."""
	import time, traceback
	if image is None:
	raise gr.Error("Upload an image first.")
	try:
	t0 = time.time()
	_load() # no-op if already loaded
	_MESH.to("cuda")
	_PIPE.to("cuda")
	if _MESH_OK and _MESH is not None:
	try: _MESH.to("cuda")
	except Exception: pass
	print(f"[fix] models on GPU, t={time.time()-t0:.0f}s (mesh={_MESH_OK})", flush=True)
	init, (ow, oh) = _fit(image.convert("RGB"))
	W, H = init.size
	print(f"[fix] input fitted to {W}x{H}", flush=True)

	# optional hand-drawn mask from the ImageMask component
	sent_mask = None
	if isinstance(mask_layers, dict):
	layers = mask_layers.get("layers") or []
	if layers:
	m = layers[0].convert("L").resize((W, H), Image.LANCZOS)
	if m.getbbox() is not None:
	sent_mask = m

	depth_img = None
	auto_mask = None
	if _MESH_OK and _MESH is not None:
	print("[fix] running MeshGraphormer…", flush=True)
	try:
	mg = _MESH(init)
	depth_img, auto_mask = (mg[0], (mg[1] if len(mg) > 1 else None)) if isinstance(mg, tuple) else (mg, None)
	if depth_img is not None:
	depth_img = depth_img.convert("RGB").resize((W, H), Image.LANCZOS)
	except Exception as e:
	print("[fix] mesh inference failed, falling back to mask:", e, flush=True)

	mask_img = sent_mask or (auto_mask.convert("L").resize((W, H), Image.LANCZOS) if auto_mask else None)
	if mask_img is None:
	if not _MESH_OK:
	raise gr.Error("Auto hand-detection isn't available on this Space build. "
	"Paint a mask over the bad hand (use the brush on the image) and run again.")
	raise gr.Error("No hands detected. Paint a mask over the hand and try again.")

	# if we have no depth (no mesh), use the masked region of the image as a soft control
	if depth_img is None:
	depth_img = init # tile/identity-style guidance keeps structure from the source

	mask_img = mask_img.filter(ImageFilter.GaussianBlur(2))
	print("[fix] running diffusion…", flush=True)
	out = _PIPE(
	prompt=prompt or DEFAULT_PROMPT, negative_prompt=NEG, image=init, mask_image=mask_img,
	control_image=depth_img, num_inference_steps=25, strength=float(strength),
	guidance_scale=7.5, controlnet_conditioning_scale=0.7,
	).images[0]
	print(f"[fix] done, total {time.time()-t0:.0f}s", flush=True)
	return out.resize((ow, oh), Image.LANCZOS)
	except Exception as e:
	print("[fix] ERROR:\n" + traceback.format_exc(), flush=True)
	raise gr.Error(f"Fix failed: {e}")

	@spaces.GPU(duration=120)
	def detail_pass(image, strength, scale):
	"""Detail/lineart recovery via Tile-ControlNet img2img at low denoise.
	Regenerates real detail and clean lines while preserving composition + style.
	No prompt is used (per ControlNet-tile guidance) so it can't redraw the subject."""
	import time, traceback
	if image is None:
	raise gr.Error("Upload an image first.")
	try:
	t0 = time.time()
	_load_detail()
	_DETAIL.to("cuda")
	src = image["background"] if isinstance(image, dict) else image
	src = src.convert("RGB")
	# optionally enlarge first (Lanczos) — the model then fills in real detail at the higher res
	scale = float(scale)
	if scale > 1.01:
	src = src.resize((int(src.widthscale), int(src.heightscale)), Image.LANCZOS)
	work, (ow, oh) = _fit_to(src, DETAIL_MAX_SIDE)
	print(f"[detail] working at {work.size}, denoise={strength}", flush=True)
	# tile controlnet uses the image itself as the control signal
	out = _DETAIL(
	prompt="", negative_prompt=DETAIL_NEG,
	image=work, control_image=work,
	num_inference_steps=30, strength=float(strength),
	guidance_scale=6.0, controlnet_conditioning_scale=1.0,
	).images[0]
	if out.size != (ow, oh):
	out = out.resize((ow, oh), Image.LANCZOS)
	print(f"[detail] done, total {time.time()-t0:.0f}s", flush=True)
	return out
	except Exception as e:
	print("[detail] ERROR:\n" + traceback.format_exc(), flush=True)
	raise gr.Error(f"Detail pass failed: {e}")

	with gr.Blocks(title="DARKROOM", theme=gr.themes.Base()) as demo:
	gr.Markdown("## 🎨 DARKROOM\nAI-art repair on GPU. Fix hands regenerates malformed hands "
	"with correct geometry. Add detail uses Tile-ControlNet img2img to recover real "
	"sharpness and clean lineart while keeping your original style.")
	with gr.Tab("Fix hands"):
	with gr.Row():
	with gr.Column():
	inp = gr.ImageMask(type="pil", label="Image (optionally paint over the bad hand)")
	prompt = gr.Textbox(value=DEFAULT_PROMPT, label="Prompt", lines=2)
	strength = gr.Slider(0.3, 1.0, value=0.75, step=0.05, label="Fix strength (denoise)")
	btn = gr.Button("Fix hands", variant="primary")
	with gr.Column():
	out = gr.Image(type="pil", label="Result")
	btn.click(fix_hands, inputs=[inp, inp, prompt, strength], outputs=out, api_name="fix_hands")
	with gr.Tab("Add detail"):
	with gr.Row():
	with gr.Column():
	dinp = gr.Image(type="pil", label="Image to sharpen / add detail")
	dstrength = gr.Slider(0.15, 0.6, value=0.3, step=0.05,
	label="Detail strength (low = safe & on-style, high = more new detail / more drift)")
	dscale = gr.Slider(1.0, 2.0, value=1.0, step=0.5, label="Enlarge first (×)")
	dbtn = gr.Button("Add detail", variant="primary")
	with gr.Column():
	dout = gr.Image(type="pil", label="Result")
	dbtn.click(detail_pass, inputs=[dinp, dstrength, dscale], outputs=dout, api_name="detail_pass")

	if __name__ == "__main__":
	demo.queue().launch()