""" DARKROOM HandRefiner — Hugging Face ZeroGPU Space ================================================= Standard Gradio Interface (the pattern ZeroGPU actually supports): upload an image, optionally paint a mask, get the hands structurally fixed on a free on-demand GPU. This is the reliable shape — the previous "custom FastAPI route" build failed with "No @spaces.GPU function detected" because ZeroGPU only detects GPU functions wired into a normal Gradio app. PIPELINE: MeshGraphormer hand-mesh -> depth map -> depth ControlNet -> Stable Diffusion inpainting (HandRefiner). Fixes only the hand region. -------------------------------------------------------------------------- DEPLOY (needs a HF PRO account to CREATE a ZeroGPU Space — $9/mo) -------------------------------------------------------------------------- 1. huggingface.co -> New Space -> SDK: Gradio -> Hardware: ZeroGPU 2. Upload: app.py, requirements.txt, README.md 3. Wait for build, then use the Space UI (or call it from the DARKROOM tool via the gradio_client endpoint shown on the Space's "View API" page). HONEST LIMITS: * Creating a ZeroGPU Space requires PRO. Using one is free within a daily quota (resets 24h after first use); each fix is a few GPU-seconds. * GPU duration is capped (~120s max). We request 90s. * Stock depth ControlNet is okay-not-perfect; swap CONTROLNET_ID to hr16/ControlNet-HandRefiner-pruned for finetuned quality. * MeshGraphormer can't fix unreadable hands or crossed fingers. """ import spaces # must precede torch for ZeroGPU import torch from PIL import Image, ImageFilter import gradio as gr SD_INPAINT_ID = "runwayml/stable-diffusion-inpainting" CONTROLNET_ID = "lllyasviel/control_v11f1p_sd15_depth" # -> hr16/ControlNet-HandRefiner-pruned for best MESHGRAPHORMER_ID = "hr16/ControlNet-HandRefiner-pruned" MAX_SIDE = 768 DEFAULT_PROMPT = "a detailed, anatomically correct hand with five fingers, natural proportions, same art style and lighting" NEG = "extra fingers, fused fingers, missing fingers, deformed, mutated, blurry, low quality" _PIPE = None _MESH = None def _load(): """Load on CPU at import time. Models are moved to GPU inside the @spaces.GPU call, so the timed GPU window is spent on inference, not on multi-GB model loading — which is what caused first-call stalls/timeouts.""" global _PIPE, _MESH if _PIPE is not None: return import time from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, UniPCMultistepScheduler from controlnet_aux import MeshGraphormerDetector t0 = time.time() print("[load] starting model load on CPU…", flush=True) _MESH = MeshGraphormerDetector.from_pretrained(MESHGRAPHORMER_ID) print(f"[load] meshgraphormer ok ({time.time()-t0:.0f}s)", flush=True) cn = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16) pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained( SD_INPAINT_ID, controlnet=cn, torch_dtype=torch.float16, safety_checker=None ) pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) try: pipe.enable_attention_slicing() except Exception as e: print("[load] attn-slicing skip:", e, flush=True) try: pipe.enable_vae_tiling() except Exception as e: print("[load] vae-tiling skip:", e, flush=True) _PIPE = pipe print(f"[load] pipeline ready on CPU ({time.time()-t0:.0f}s total)", flush=True) # preload at import — runs once when the container boots, OUTSIDE any GPU-timed window try: _load() except Exception as _e: print("[load] preload deferred:", _e, flush=True) def _fit(img): w, h = img.size s = min(1.0, MAX_SIDE / max(w, h)) return img.resize((max(8, int(round(w*s/8))*8), max(8, int(round(h*s/8))*8)), Image.LANCZOS), (w, h) @spaces.GPU(duration=120) def fix_hands(image, mask_layers, prompt, strength): """ZeroGPU-allocated worker. Models are already loaded (CPU) at import; here we move them onto the GPU that ZeroGPU just attached, then infer.""" import time, traceback if image is None: raise gr.Error("Upload an image first.") try: t0 = time.time() _load() # no-op if already loaded _MESH.to("cuda") _PIPE.to("cuda") print(f"[fix] models on GPU, t={time.time()-t0:.0f}s", flush=True) init, (ow, oh) = _fit(image.convert("RGB")) W, H = init.size print(f"[fix] input fitted to {W}x{H}", flush=True) # optional hand-drawn mask from the ImageMask component sent_mask = None if isinstance(mask_layers, dict): layers = mask_layers.get("layers") or [] if layers: m = layers[0].convert("L").resize((W, H), Image.LANCZOS) if m.getbbox() is not None: sent_mask = m print("[fix] running MeshGraphormer…", flush=True) mg = _MESH(init) depth_img, auto_mask = (mg[0], (mg[1] if len(mg) > 1 else None)) if isinstance(mg, tuple) else (mg, None) depth_img = depth_img.convert("RGB").resize((W, H), Image.LANCZOS) mask_img = sent_mask or (auto_mask.convert("L").resize((W, H), Image.LANCZOS) if auto_mask else None) if mask_img is None: raise gr.Error("No hands detected. Paint a mask over the hand and try again.") mask_img = mask_img.filter(ImageFilter.GaussianBlur(2)) print("[fix] running diffusion…", flush=True) out = _PIPE( prompt=prompt or DEFAULT_PROMPT, negative_prompt=NEG, image=init, mask_image=mask_img, control_image=depth_img, num_inference_steps=25, strength=float(strength), guidance_scale=7.5, controlnet_conditioning_scale=0.7, ).images[0] print(f"[fix] done, total {time.time()-t0:.0f}s", flush=True) return out.resize((ow, oh), Image.LANCZOS) except Exception as e: print("[fix] ERROR:\n" + traceback.format_exc(), flush=True) raise gr.Error(f"Fix failed: {e}") with gr.Blocks(title="DARKROOM HandRefiner", theme=gr.themes.Base()) as demo: gr.Markdown("## 🖐️ DARKROOM HandRefiner\nUpload AI art with bad hands. It auto-detects hands " "(MeshGraphormer) and regenerates them with correct geometry. Optionally paint a mask " "to target a specific hand. Free GPU runs a few seconds per fix.") with gr.Row(): with gr.Column(): inp = gr.ImageMask(type="pil", label="Image (optionally paint over the bad hand)") prompt = gr.Textbox(value=DEFAULT_PROMPT, label="Prompt", lines=2) strength = gr.Slider(0.3, 1.0, value=0.75, step=0.05, label="Fix strength (denoise)") btn = gr.Button("Fix hands", variant="primary") with gr.Column(): out = gr.Image(type="pil", label="Result") btn.click(fix_hands, inputs=[inp, inp, prompt, strength], outputs=out, api_name="fix_hands") if __name__ == "__main__": demo.queue().launch()