Spaces:
Build error
Build error
| """ | |
| DARKROOM HandRefiner — Hugging Face ZeroGPU Space | |
| ================================================= | |
| Standard Gradio Interface (the pattern ZeroGPU actually supports): upload an | |
| image, optionally paint a mask, get the hands structurally fixed on a free | |
| on-demand GPU. This is the reliable shape — the previous "custom FastAPI route" | |
| build failed with "No @spaces.GPU function detected" because ZeroGPU only | |
| detects GPU functions wired into a normal Gradio app. | |
| PIPELINE: MeshGraphormer hand-mesh -> depth map -> depth ControlNet -> | |
| Stable Diffusion inpainting (HandRefiner). Fixes only the hand region. | |
| -------------------------------------------------------------------------- | |
| DEPLOY (needs a HF PRO account to CREATE a ZeroGPU Space — $9/mo) | |
| -------------------------------------------------------------------------- | |
| 1. huggingface.co -> New Space -> SDK: Gradio -> Hardware: ZeroGPU | |
| 2. Upload: app.py, requirements.txt, README.md | |
| 3. Wait for build, then use the Space UI (or call it from the DARKROOM tool | |
| via the gradio_client endpoint shown on the Space's "View API" page). | |
| HONEST LIMITS: | |
| * Creating a ZeroGPU Space requires PRO. Using one is free within a daily quota | |
| (resets 24h after first use); each fix is a few GPU-seconds. | |
| * GPU duration is capped (~120s max). We request 90s. | |
| * Stock depth ControlNet is okay-not-perfect; swap CONTROLNET_ID to | |
| hr16/ControlNet-HandRefiner-pruned for finetuned quality. | |
| * MeshGraphormer can't fix unreadable hands or crossed fingers. | |
| """ | |
| import spaces # must precede torch for ZeroGPU | |
| import torch | |
| from PIL import Image, ImageFilter | |
| import gradio as gr | |
| # --------------------------------------------------------------------------- | |
| # transformers compatibility shim (fixes MeshGraphormer import on new transformers) | |
| # Newer transformers removed prune_linear_layer / Conv1D from transformers.modeling_utils, | |
| # which is exactly what breaks the vendored MeshGraphormer (ComfyUI issue #578). | |
| # Re-expose them so the legacy import succeeds. | |
| # --------------------------------------------------------------------------- | |
| def _patch_transformers(): | |
| try: | |
| import transformers.modeling_utils as mu | |
| need = ("prune_linear_layer", "Conv1D", "prune_layer") | |
| if all(hasattr(mu, n) for n in need): | |
| return | |
| from transformers import pytorch_utils as pu | |
| for n in need: | |
| if not hasattr(mu, n) and hasattr(pu, n): | |
| setattr(mu, n, getattr(pu, n)) | |
| print("[shim] transformers symbols patched", flush=True) | |
| except Exception as e: | |
| print("[shim] transformers patch skipped:", e, flush=True) | |
| _patch_transformers() | |
| SD_INPAINT_ID = "runwayml/stable-diffusion-inpainting" | |
| CONTROLNET_ID = "lllyasviel/control_v11f1p_sd15_depth" | |
| TILE_CN_ID = "lllyasviel/control_v11f1e_sd15_tile" # detail-regeneration ControlNet | |
| SD_BASE_ID = "runwayml/stable-diffusion-v1-5" # base SD for img2img detail pass | |
| MESHGRAPHORMER_ID = "hr16/ControlNet-HandRefiner-pruned" | |
| MAX_SIDE = 768 | |
| DETAIL_MAX_SIDE = 1280 # detail pass can work larger since it's tiled-friendly | |
| DEFAULT_PROMPT = "a detailed, anatomically correct hand with five fingers, natural proportions, same art style and lighting" | |
| NEG = "extra fingers, fused fingers, missing fingers, deformed, mutated, blurry, low quality" | |
| DETAIL_NEG = "blurry, soft, out of focus, jpeg artifacts, low quality, smudged, messy lines" | |
| _PIPE = None | |
| _MESH = None | |
| _DETAIL = None | |
| _MESH_OK = False | |
| _MESH_ERR = None | |
| def _make_mesh_detector(): | |
| """controlnet_aux==0.0.6 ships MeshGraphormerDetector at the top level. | |
| (Newer versions dropped it — that's why the pin matters.)""" | |
| from controlnet_aux import MeshGraphormerDetector as MGD | |
| return MGD.from_pretrained(MESHGRAPHORMER_ID) | |
| def _load(): | |
| """Load SD inpaint + ControlNet (always works, diffusers-only) and attempt | |
| MeshGraphormer (optional). If MeshGraphormer fails, the Space still runs; | |
| hand auto-detect is then unavailable but manual-mask + detail pass work.""" | |
| global _PIPE, _MESH, _MESH_OK, _MESH_ERR | |
| if _PIPE is not None: | |
| return | |
| import time | |
| from diffusers import StableDiffusionControlNetInpaintPipeline, ControlNetModel, UniPCMultistepScheduler | |
| t0 = time.time() | |
| print("[load] starting model load on CPU…", flush=True) | |
| # MeshGraphormer is optional — isolate it so it can't crash the container | |
| try: | |
| _MESH = _make_mesh_detector() | |
| _MESH_OK = True | |
| print(f"[load] meshgraphormer ok ({time.time()-t0:.0f}s)", flush=True) | |
| except Exception as e: | |
| _MESH = None; _MESH_OK = False; _MESH_ERR = str(e) | |
| print("[load] meshgraphormer UNAVAILABLE (manual mask still works):", e, flush=True) | |
| cn = ControlNetModel.from_pretrained(CONTROLNET_ID, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained( | |
| SD_INPAINT_ID, controlnet=cn, torch_dtype=torch.float16, safety_checker=None | |
| ) | |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
| try: pipe.enable_attention_slicing() | |
| except Exception as e: print("[load] attn-slicing skip:", e, flush=True) | |
| try: pipe.enable_vae_tiling() | |
| except Exception as e: print("[load] vae-tiling skip:", e, flush=True) | |
| _PIPE = pipe | |
| print(f"[load] pipeline ready on CPU ({time.time()-t0:.0f}s total)", flush=True) | |
| # preload at import — runs once when the container boots, OUTSIDE any GPU-timed window | |
| try: | |
| _load() | |
| except Exception as _e: | |
| print("[load] preload deferred:", _e, flush=True) | |
| def _load_detail(): | |
| """Tile-ControlNet img2img pipeline for detail/lineart recovery. Loaded lazily on CPU.""" | |
| global _DETAIL | |
| if _DETAIL is not None: | |
| return | |
| import time | |
| from diffusers import StableDiffusionControlNetImg2ImgPipeline, ControlNetModel, UniPCMultistepScheduler | |
| t0 = time.time() | |
| print("[load] detail pipeline (tile CN) on CPU…", flush=True) | |
| tile = ControlNetModel.from_pretrained(TILE_CN_ID, torch_dtype=torch.float16) | |
| pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained( | |
| SD_BASE_ID, controlnet=tile, torch_dtype=torch.float16, safety_checker=None | |
| ) | |
| pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) | |
| try: pipe.enable_attention_slicing() | |
| except Exception as e: print("[load] attn-slicing skip:", e, flush=True) | |
| try: pipe.enable_vae_tiling() | |
| except Exception as e: print("[load] vae-tiling skip:", e, flush=True) | |
| _DETAIL = pipe | |
| print(f"[load] detail pipeline ready ({time.time()-t0:.0f}s)", flush=True) | |
| def _fit_to(img, max_side): | |
| w, h = img.size | |
| s = min(1.0, max_side / max(w, h)) | |
| return img.resize((max(8, int(round(w*s/8))*8), max(8, int(round(h*s/8))*8)), Image.LANCZOS), (w, h) | |
| def _fit(img): | |
| w, h = img.size | |
| s = min(1.0, MAX_SIDE / max(w, h)) | |
| return img.resize((max(8, int(round(w*s/8))*8), max(8, int(round(h*s/8))*8)), Image.LANCZOS), (w, h) | |
| def fix_hands(image, mask_layers, prompt, strength): | |
| """ZeroGPU-allocated worker. Models are already loaded (CPU) at import; | |
| here we move them onto the GPU that ZeroGPU just attached, then infer.""" | |
| import time, traceback | |
| if image is None: | |
| raise gr.Error("Upload an image first.") | |
| try: | |
| t0 = time.time() | |
| _load() # no-op if already loaded | |
| _MESH.to("cuda") | |
| _PIPE.to("cuda") | |
| if _MESH_OK and _MESH is not None: | |
| try: _MESH.to("cuda") | |
| except Exception: pass | |
| print(f"[fix] models on GPU, t={time.time()-t0:.0f}s (mesh={_MESH_OK})", flush=True) | |
| init, (ow, oh) = _fit(image.convert("RGB")) | |
| W, H = init.size | |
| print(f"[fix] input fitted to {W}x{H}", flush=True) | |
| # optional hand-drawn mask from the ImageMask component | |
| sent_mask = None | |
| if isinstance(mask_layers, dict): | |
| layers = mask_layers.get("layers") or [] | |
| if layers: | |
| m = layers[0].convert("L").resize((W, H), Image.LANCZOS) | |
| if m.getbbox() is not None: | |
| sent_mask = m | |
| depth_img = None | |
| auto_mask = None | |
| if _MESH_OK and _MESH is not None: | |
| print("[fix] running MeshGraphormer…", flush=True) | |
| try: | |
| mg = _MESH(init) | |
| depth_img, auto_mask = (mg[0], (mg[1] if len(mg) > 1 else None)) if isinstance(mg, tuple) else (mg, None) | |
| if depth_img is not None: | |
| depth_img = depth_img.convert("RGB").resize((W, H), Image.LANCZOS) | |
| except Exception as e: | |
| print("[fix] mesh inference failed, falling back to mask:", e, flush=True) | |
| mask_img = sent_mask or (auto_mask.convert("L").resize((W, H), Image.LANCZOS) if auto_mask else None) | |
| if mask_img is None: | |
| if not _MESH_OK: | |
| raise gr.Error("Auto hand-detection isn't available on this Space build. " | |
| "Paint a mask over the bad hand (use the brush on the image) and run again.") | |
| raise gr.Error("No hands detected. Paint a mask over the hand and try again.") | |
| # if we have no depth (no mesh), use the masked region of the image as a soft control | |
| if depth_img is None: | |
| depth_img = init # tile/identity-style guidance keeps structure from the source | |
| mask_img = mask_img.filter(ImageFilter.GaussianBlur(2)) | |
| print("[fix] running diffusion…", flush=True) | |
| out = _PIPE( | |
| prompt=prompt or DEFAULT_PROMPT, negative_prompt=NEG, image=init, mask_image=mask_img, | |
| control_image=depth_img, num_inference_steps=25, strength=float(strength), | |
| guidance_scale=7.5, controlnet_conditioning_scale=0.7, | |
| ).images[0] | |
| print(f"[fix] done, total {time.time()-t0:.0f}s", flush=True) | |
| return out.resize((ow, oh), Image.LANCZOS) | |
| except Exception as e: | |
| print("[fix] ERROR:\n" + traceback.format_exc(), flush=True) | |
| raise gr.Error(f"Fix failed: {e}") | |
| def detail_pass(image, strength, scale): | |
| """Detail/lineart recovery via Tile-ControlNet img2img at low denoise. | |
| Regenerates real detail and clean lines while preserving composition + style. | |
| No prompt is used (per ControlNet-tile guidance) so it can't redraw the subject.""" | |
| import time, traceback | |
| if image is None: | |
| raise gr.Error("Upload an image first.") | |
| try: | |
| t0 = time.time() | |
| _load_detail() | |
| _DETAIL.to("cuda") | |
| src = image["background"] if isinstance(image, dict) else image | |
| src = src.convert("RGB") | |
| # optionally enlarge first (Lanczos) — the model then fills in real detail at the higher res | |
| scale = float(scale) | |
| if scale > 1.01: | |
| src = src.resize((int(src.width*scale), int(src.height*scale)), Image.LANCZOS) | |
| work, (ow, oh) = _fit_to(src, DETAIL_MAX_SIDE) | |
| print(f"[detail] working at {work.size}, denoise={strength}", flush=True) | |
| # tile controlnet uses the image itself as the control signal | |
| out = _DETAIL( | |
| prompt="", negative_prompt=DETAIL_NEG, | |
| image=work, control_image=work, | |
| num_inference_steps=30, strength=float(strength), | |
| guidance_scale=6.0, controlnet_conditioning_scale=1.0, | |
| ).images[0] | |
| if out.size != (ow, oh): | |
| out = out.resize((ow, oh), Image.LANCZOS) | |
| print(f"[detail] done, total {time.time()-t0:.0f}s", flush=True) | |
| return out | |
| except Exception as e: | |
| print("[detail] ERROR:\n" + traceback.format_exc(), flush=True) | |
| raise gr.Error(f"Detail pass failed: {e}") | |
| with gr.Blocks(title="DARKROOM", theme=gr.themes.Base()) as demo: | |
| gr.Markdown("## 🎨 DARKROOM\nAI-art repair on GPU. **Fix hands** regenerates malformed hands " | |
| "with correct geometry. **Add detail** uses Tile-ControlNet img2img to recover real " | |
| "sharpness and clean lineart while keeping your original style.") | |
| with gr.Tab("Fix hands"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| inp = gr.ImageMask(type="pil", label="Image (optionally paint over the bad hand)") | |
| prompt = gr.Textbox(value=DEFAULT_PROMPT, label="Prompt", lines=2) | |
| strength = gr.Slider(0.3, 1.0, value=0.75, step=0.05, label="Fix strength (denoise)") | |
| btn = gr.Button("Fix hands", variant="primary") | |
| with gr.Column(): | |
| out = gr.Image(type="pil", label="Result") | |
| btn.click(fix_hands, inputs=[inp, inp, prompt, strength], outputs=out, api_name="fix_hands") | |
| with gr.Tab("Add detail"): | |
| with gr.Row(): | |
| with gr.Column(): | |
| dinp = gr.Image(type="pil", label="Image to sharpen / add detail") | |
| dstrength = gr.Slider(0.15, 0.6, value=0.3, step=0.05, | |
| label="Detail strength (low = safe & on-style, high = more new detail / more drift)") | |
| dscale = gr.Slider(1.0, 2.0, value=1.0, step=0.5, label="Enlarge first (×)") | |
| dbtn = gr.Button("Add detail", variant="primary") | |
| with gr.Column(): | |
| dout = gr.Image(type="pil", label="Result") | |
| dbtn.click(detail_pass, inputs=[dinp, dstrength, dscale], outputs=dout, api_name="detail_pass") | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |