import numpy as np from PIL import Image from scipy.ndimage import gaussian_filter import torch import gradio as gr from transformers import pipeline device = 0 if torch.cuda.is_available() else -1 seg_pipe = pipeline( task="image-segmentation", model="nvidia/segformer-b5-finetuned-ade-640-640", device=device ) depth_pipe = pipeline( task="depth-estimation", model="Intel/dpt-hybrid-midas", device=device ) def pil_mask_to_np(mask_pil): mask = np.array(mask_pil) if mask.ndim == 3: mask = mask[..., 0] mask = (mask > 0).astype(np.uint8) return mask def build_foreground_mask(seg_results, target_label="person"): combined = None for item in seg_results: if item["label"].lower() == target_label.lower(): m = pil_mask_to_np(item["mask"]) combined = m if combined is None else np.maximum(combined, m) if combined is None: max_area = -1 best_mask = None for item in seg_results: m = pil_mask_to_np(item["mask"]) area = m.sum() if area > max_area: max_area = area best_mask = m combined = best_mask return combined.astype(np.uint8) def apply_background_blur(pil_img, fg_mask, sigma=15): img_np = np.array(pil_img).astype(np.float32) / 255.0 fg_mask = fg_mask.astype(np.float32) blurred = np.zeros_like(img_np) for c in range(3): blurred[..., c] = gaussian_filter(img_np[..., c], sigma=sigma) fg_mask_3 = np.stack([fg_mask]*3, axis=-1) out = fg_mask_3 * img_np + (1 - fg_mask_3) * blurred out = np.clip(out * 255, 0, 255).astype(np.uint8) return Image.fromarray(out) def apply_depth_lens_blur(pil_img, depth_norm, num_bins=6, max_sigma=12): img_np = np.array(pil_img).astype(np.float32) / 255.0 sigma_values = np.linspace(0, max_sigma, num_bins) blurred_stack = [] for sigma in sigma_values: if sigma == 0: blurred_stack.append(img_np.copy()) else: b = np.zeros_like(img_np) for c in range(3): b[..., c] = gaussian_filter(img_np[..., c], sigma=sigma) blurred_stack.append(b) blurred_stack = np.stack(blurred_stack, axis=0) bin_idx = np.floor(depth_norm * (num_bins - 1)).astype(np.int32) bin_idx = np.clip(bin_idx, 0, num_bins - 1) H, W = depth_norm.shape out = np.zeros_like(img_np) for i in range(H): for j in range(W): out[i, j] = blurred_stack[bin_idx[i, j], i, j] out = np.clip(out * 255, 0, 255).astype(np.uint8) return Image.fromarray(out) def process_image(image): if image is None: return None, None, None, None pil_img = Image.fromarray(image).convert("RGB").resize((512, 512)) # Segmentation seg_results = seg_pipe(pil_img) fg_mask = build_foreground_mask(seg_results, target_label="person") mask_vis = (fg_mask * 255).astype(np.uint8) # Background blur bg_blur = apply_background_blur(pil_img, fg_mask, sigma=15) # Depth depth_result = depth_pipe(pil_img) predicted_depth = depth_result["predicted_depth"] if torch.is_tensor(predicted_depth): depth_map = predicted_depth.squeeze().cpu().numpy() else: depth_map = np.array(predicted_depth) depth_min, depth_max = depth_map.min(), depth_map.max() depth_norm = (depth_map - depth_min) / (depth_max - depth_min + 1e-8) depth_vis = (depth_norm * 255).astype(np.uint8) # Lens blur lens_blur = apply_depth_lens_blur(pil_img, depth_norm, num_bins=6, max_sigma=12) return np.array(mask_vis), np.array(bg_blur), depth_vis, np.array(lens_blur) demo = gr.Interface( fn=process_image, inputs=gr.Image(type="numpy", label="Upload Image"), outputs=[ gr.Image(type="numpy", label="Foreground Mask"), gr.Image(type="numpy", label="Background Blur (σ=15)"), gr.Image(type="numpy", label="Depth Map"), gr.Image(type="numpy", label="Depth-Based Lens Blur"), ], title="Segmentation and Depth-Based Blur Demo", description="Upload an image to generate a foreground mask, background blur, depth map, and depth-based lens blur." ) if __name__ == "__main__": demo.launch()