ICLight

Paused

App Files Files Community

daKhosa commited on 6 days ago

Commit

2fc70fd

1 Parent(s): 3901f61

Replace exec stub with IC-Light app

Browse files

Files changed (4) hide show

README.md +28 -1
app.py +497 -1
briarmbg.py +462 -0
requirements.txt +3 -1

README.md CHANGED Viewed

@@ -8,6 +8,33 @@ sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 license: other
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 app_file: app.py
 pinned: false
 license: other
+suggested_hardware: zero-a10g
 ---
+# IC-Light Relighting
+This Space replaces the previous `EXEC` environment-variable stub with a real Gradio app and a ZeroGPU-decorated inference function.
+The implementation is based on the public `lllyasviel/ic-light` SD1.5 foreground-conditioned IC-Light model:
+- code reference: https://github.com/lllyasviel/IC-Light
+- weights: https://huggingface.co/lllyasviel/ic-light
+- model file used here: `iclight_sd15_fc.safetensors`
+## V2-Vary provenance
+`lllyasviel` announced IC-Light V2-Vary in GitHub discussion #109 as an alternative model for stronger illumination variations:
+https://github.com/lllyasviel/IC-Light/discussions/109
+The linked official Space is:
+https://huggingface.co/spaces/lllyasviel/iclight-v2-vary
+As of this repo update, that Space's public git tree contains only:
+```python
+import os; exec(os.getenv('EXEC'))
+```
+The public `lllyasviel/ic-light` model repository only exposes the SD1.5 IC-Light weights, not Flux/V2-Vary weights. Because the V2-Vary app source and weights are not public in the Space git tree or the public model repo, this Space uses the available upstream IC-Light integration rather than preserving the unsafe hidden-exec stub.

app.py CHANGED Viewed

	@@ -1 +1,497 @@
1	- import ~~os; exec(os.getenv('EXEC'))~~

+import math
+import random
+from enum import Enum
+import gradio as gr
+import numpy as np
+import safetensors.torch as sf
+import torch
+from diffusers import (
+    AutoencoderKL,
+    DPMSolverMultistepScheduler,
+    StableDiffusionImg2ImgPipeline,
+    StableDiffusionPipeline,
+    UNet2DConditionModel,
+)
+from diffusers.models.attention_processor import AttnProcessor2_0
+from huggingface_hub import hf_hub_download
+from PIL import Image
+from transformers import CLIPTextModel, CLIPTokenizer
+from briarmbg import BriaRMBG
+try:
+    import spaces
+except ImportError:
+    class spaces:
+        @staticmethod
+        def GPU(duration=120):
+            def decorator(fn):
+                return fn
+            return decorator
+BASE_MODEL = "stablediffusionapi/realistic-vision-v51"
+ICLIGHT_REPO = "lllyasviel/ic-light"
+MODEL_FILE = "iclight_sd15_fc.safetensors"
+NEGATIVE_PROMPT = "lowres, bad anatomy, bad hands, cropped, worst quality"
+ADDED_PROMPT = "best quality"
+_ENGINE = None
+class BGSource(Enum):
+    NONE = "None"
+    LEFT = "Left Light"
+    RIGHT = "Right Light"
+    TOP = "Top Light"
+    BOTTOM = "Bottom Light"
+def ensure_rgb(image):
+    if image is None:
+        raise gr.Error("Upload an image first.")
+    if isinstance(image, Image.Image):
+        return np.array(image.convert("RGB"))
+    if image.ndim == 2:
+        image = np.stack([image, image, image], axis=-1)
+    if image.shape[-1] == 4:
+        image = np.array(Image.fromarray(image).convert("RGB"))
+    return image[:, :, :3].astype(np.uint8)
+def resize_and_center_crop(image, target_width, target_height):
+    pil_image = Image.fromarray(image)
+    original_width, original_height = pil_image.size
+    scale_factor = max(target_width / original_width, target_height / original_height)
+    resized_width = int(round(original_width * scale_factor))
+    resized_height = int(round(original_height * scale_factor))
+    resized_image = pil_image.resize((resized_width, resized_height), Image.LANCZOS)
+    left = (resized_width - target_width) / 2
+    top = (resized_height - target_height) / 2
+    right = (resized_width + target_width) / 2
+    bottom = (resized_height + target_height) / 2
+    return np.array(resized_image.crop((left, top, right, bottom)))
+def resize_without_crop(image, target_width, target_height):
+    return np.array(Image.fromarray(image).resize((target_width, target_height), Image.LANCZOS))
+def numpy2pytorch(imgs):
+    h = torch.from_numpy(np.stack(imgs, axis=0)).float() / 127.0 - 1.0
+    return h.movedim(-1, 1)
+def pytorch2numpy(imgs):
+    results = []
+    for x in imgs:
+        y = x.movedim(0, -1)
+        y = y * 127.5 + 127.5
+        y = y.detach().float().cpu().numpy().clip(0, 255).astype(np.uint8)
+        results.append(y)
+    return results
+class ICLightEngine:
+    def __init__(self):
+        if not torch.cuda.is_available():
+            raise gr.Error("IC-Light inference requires a CUDA GPU. On Hugging Face, enable ZeroGPU hardware.")
+        self.device = torch.device("cuda")
+        self.tokenizer = CLIPTokenizer.from_pretrained(BASE_MODEL, subfolder="tokenizer")
+        self.text_encoder = CLIPTextModel.from_pretrained(BASE_MODEL, subfolder="text_encoder")
+        self.vae = AutoencoderKL.from_pretrained(BASE_MODEL, subfolder="vae")
+        self.unet = UNet2DConditionModel.from_pretrained(BASE_MODEL, subfolder="unet")
+        self.rmbg = BriaRMBG.from_pretrained("briaai/RMBG-1.4")
+        self._patch_unet_input()
+        self._load_iclight_weights()
+        self._move_to_gpu()
+        self._build_pipelines()
+    def _patch_unet_input(self):
+        with torch.no_grad():
+            new_conv_in = torch.nn.Conv2d(
+                8,
+                self.unet.conv_in.out_channels,
+                self.unet.conv_in.kernel_size,
+                self.unet.conv_in.stride,
+                self.unet.conv_in.padding,
+            )
+            new_conv_in.weight.zero_()
+            new_conv_in.weight[:, :4, :, :].copy_(self.unet.conv_in.weight)
+            new_conv_in.bias = self.unet.conv_in.bias
+            self.unet.conv_in = new_conv_in
+        unet_original_forward = self.unet.forward
+        def hooked_unet_forward(sample, timestep, encoder_hidden_states, **kwargs):
+            c_concat = kwargs["cross_attention_kwargs"]["concat_conds"].to(sample)
+            c_concat = torch.cat([c_concat] * (sample.shape[0] // c_concat.shape[0]), dim=0)
+            new_sample = torch.cat([sample, c_concat], dim=1)
+            kwargs["cross_attention_kwargs"] = {}
+            return unet_original_forward(new_sample, timestep, encoder_hidden_states, **kwargs)
+        self.unet.forward = hooked_unet_forward
+    def _load_iclight_weights(self):
+        model_path = hf_hub_download(ICLIGHT_REPO, MODEL_FILE)
+        sd_offset = sf.load_file(model_path, device="cpu")
+        sd_origin = self.unet.state_dict()
+        sd_merged = {
+            key: sd_origin[key] + sd_offset[key].to(dtype=sd_origin[key].dtype)
+            for key in sd_origin.keys()
+        }
+        self.unet.load_state_dict(sd_merged, strict=True)
+        del sd_offset, sd_origin, sd_merged
+    def _move_to_gpu(self):
+        self.text_encoder = self.text_encoder.to(device=self.device, dtype=torch.float16)
+        self.vae = self.vae.to(device=self.device, dtype=torch.bfloat16)
+        self.unet = self.unet.to(device=self.device, dtype=torch.float16)
+        self.rmbg = self.rmbg.to(device=self.device, dtype=torch.float32)
+        self.unet.set_attn_processor(AttnProcessor2_0())
+        self.vae.set_attn_processor(AttnProcessor2_0())
+    def _build_pipelines(self):
+        scheduler = DPMSolverMultistepScheduler(
+            num_train_timesteps=1000,
+            beta_start=0.00085,
+            beta_end=0.012,
+            algorithm_type="sde-dpmsolver++",
+            use_karras_sigmas=True,
+            steps_offset=1,
+        )
+        pipe_kwargs = dict(
+            vae=self.vae,
+            text_encoder=self.text_encoder,
+            tokenizer=self.tokenizer,
+            unet=self.unet,
+            scheduler=scheduler,
+            safety_checker=None,
+            requires_safety_checker=False,
+            feature_extractor=None,
+            image_encoder=None,
+        )
+        self.t2i_pipe = StableDiffusionPipeline(**pipe_kwargs)
+        self.i2i_pipe = StableDiffusionImg2ImgPipeline(**pipe_kwargs)
+    @torch.inference_mode()
+    def encode_prompt_inner(self, txt):
+        max_length = self.tokenizer.model_max_length
+        chunk_length = self.tokenizer.model_max_length - 2
+        id_start = self.tokenizer.bos_token_id
+        id_end = self.tokenizer.eos_token_id
+        id_pad = id_end
+        def pad(x, p, i):
+            return x[:i] if len(x) >= i else x + [p] * (i - len(x))
+        tokens = self.tokenizer(txt, truncation=False, add_special_tokens=False)["input_ids"]
+        chunks = [
+            [id_start] + tokens[i: i + chunk_length] + [id_end]
+            for i in range(0, len(tokens), chunk_length)
+        ]
+        chunks = [pad(chunk, id_pad, max_length) for chunk in chunks]
+        token_ids = torch.tensor(chunks).to(device=self.device, dtype=torch.int64)
+        return self.text_encoder(token_ids).last_hidden_state
+    @torch.inference_mode()
+    def encode_prompt_pair(self, positive_prompt, negative_prompt):
+        c = self.encode_prompt_inner(positive_prompt)
+        uc = self.encode_prompt_inner(negative_prompt)
+        c_len = float(len(c))
+        uc_len = float(len(uc))
+        max_count = max(c_len, uc_len)
+        c_repeat = int(math.ceil(max_count / c_len))
+        uc_repeat = int(math.ceil(max_count / uc_len))
+        max_chunk = max(len(c), len(uc))
+        c = torch.cat([c] * c_repeat, dim=0)[:max_chunk]
+        uc = torch.cat([uc] * uc_repeat, dim=0)[:max_chunk]
+        c = torch.cat([p[None, ...] for p in c], dim=1)
+        uc = torch.cat([p[None, ...] for p in uc], dim=1)
+        return c, uc
+    @torch.inference_mode()
+    def run_rmbg(self, img):
+        height, width, channels = img.shape
+        if channels != 3:
+            raise gr.Error("Input image must be RGB.")
+        k = (256.0 / float(height * width)) ** 0.5
+        feed = resize_without_crop(img, int(64 * round(width * k)), int(64 * round(height * k)))
+        feed = numpy2pytorch([feed]).to(device=self.device, dtype=torch.float32)
+        alpha = self.rmbg(feed)[0][0]
+        alpha = torch.nn.functional.interpolate(alpha, size=(height, width), mode="bilinear")
+        alpha = alpha.movedim(1, -1)[0]
+        alpha = alpha.detach().float().cpu().numpy().clip(0, 1)
+        result = 127 + (img.astype(np.float32) - 127) * alpha
+        return result.clip(0, 255).astype(np.uint8)
+    def make_initial_background(self, bg_source, image_width, image_height):
+        bg_source = BGSource(bg_source)
+        if bg_source == BGSource.NONE:
+            return None
+        if bg_source == BGSource.LEFT:
+            gradient = np.linspace(255, 0, image_width)
+            image = np.tile(gradient, (image_height, 1))
+        elif bg_source == BGSource.RIGHT:
+            gradient = np.linspace(0, 255, image_width)
+            image = np.tile(gradient, (image_height, 1))
+        elif bg_source == BGSource.TOP:
+            gradient = np.linspace(255, 0, image_height)[:, None]
+            image = np.tile(gradient, (1, image_width))
+        elif bg_source == BGSource.BOTTOM:
+            gradient = np.linspace(0, 255, image_height)[:, None]
+            image = np.tile(gradient, (1, image_width))
+        else:
+            raise gr.Error("Invalid lighting preference.")
+        return np.stack((image,) * 3, axis=-1).astype(np.uint8)
+    @torch.inference_mode()
+    def relight(
+        self,
+        input_fg,
+        prompt,
+        image_width,
+        image_height,
+        num_samples,
+        seed,
+        steps,
+        cfg,
+        highres_scale,
+        highres_denoise,
+        lowres_denoise,
+        bg_source,
+    ):
+        input_fg = ensure_rgb(input_fg)
+        input_fg = self.run_rmbg(input_fg)
+        input_bg = self.make_initial_background(bg_source, image_width, image_height)
+        if seed is None or int(seed) < 0:
+            seed = random.randint(0, 2**31 - 1)
+        rng = torch.Generator(device=self.device).manual_seed(int(seed))
+        fg = resize_and_center_crop(input_fg, image_width, image_height)
+        concat_conds = numpy2pytorch([fg]).to(device=self.vae.device, dtype=self.vae.dtype)
+        concat_conds = self.vae.encode(concat_conds).latent_dist.mode() * self.vae.config.scaling_factor
+        conds, unconds = self.encode_prompt_pair(
+            positive_prompt=f"{prompt}, {ADDED_PROMPT}",
+            negative_prompt=NEGATIVE_PROMPT,
+        )
+        if input_bg is None:
+            latents = self.t2i_pipe(
+                prompt_embeds=conds,
+                negative_prompt_embeds=unconds,
+                width=image_width,
+                height=image_height,
+                num_inference_steps=steps,
+                num_images_per_prompt=num_samples,
+                generator=rng,
+                output_type="latent",
+                guidance_scale=cfg,
+                cross_attention_kwargs={"concat_conds": concat_conds},
+            ).images.to(self.vae.dtype) / self.vae.config.scaling_factor
+        else:
+            bg = resize_and_center_crop(input_bg, image_width, image_height)
+            bg_latent = numpy2pytorch([bg]).to(device=self.vae.device, dtype=self.vae.dtype)
+            bg_latent = self.vae.encode(bg_latent).latent_dist.mode() * self.vae.config.scaling_factor
+            latents = self.i2i_pipe(
+                image=bg_latent,
+                strength=lowres_denoise,
+                prompt_embeds=conds,
+                negative_prompt_embeds=unconds,
+                width=image_width,
+                height=image_height,
+                num_inference_steps=int(round(steps / lowres_denoise)),
+                num_images_per_prompt=num_samples,
+                generator=rng,
+                output_type="latent",
+                guidance_scale=cfg,
+                cross_attention_kwargs={"concat_conds": concat_conds},
+            ).images.to(self.vae.dtype) / self.vae.config.scaling_factor
+        pixels = self.vae.decode(latents).sample
+        pixels = pytorch2numpy(pixels)
+        highres_width = int(round(image_width * highres_scale / 64.0) * 64)
+        highres_height = int(round(image_height * highres_scale / 64.0) * 64)
+        pixels = [
+            resize_without_crop(image=p, target_width=highres_width, target_height=highres_height)
+            for p in pixels
+        ]
+        pixels = numpy2pytorch(pixels).to(device=self.vae.device, dtype=self.vae.dtype)
+        latents = self.vae.encode(pixels).latent_dist.mode() * self.vae.config.scaling_factor
+        latents = latents.to(device=self.unet.device, dtype=self.unet.dtype)
+        image_height, image_width = latents.shape[2] * 8, latents.shape[3] * 8
+        fg = resize_and_center_crop(input_fg, image_width, image_height)
+        concat_conds = numpy2pytorch([fg]).to(device=self.vae.device, dtype=self.vae.dtype)
+        concat_conds = self.vae.encode(concat_conds).latent_dist.mode() * self.vae.config.scaling_factor
+        latents = self.i2i_pipe(
+            image=latents,
+            strength=highres_denoise,
+            prompt_embeds=conds,
+            negative_prompt_embeds=unconds,
+            width=image_width,
+            height=image_height,
+            num_inference_steps=int(round(steps / highres_denoise)),
+            num_images_per_prompt=num_samples,
+            generator=rng,
+            output_type="latent",
+            guidance_scale=cfg,
+            cross_attention_kwargs={"concat_conds": concat_conds},
+        ).images.to(self.vae.dtype) / self.vae.config.scaling_factor
+        pixels = self.vae.decode(latents).sample
+        return input_fg, pytorch2numpy(pixels)
+def get_engine():
+    global _ENGINE
+    if _ENGINE is None:
+        _ENGINE = ICLightEngine()
+    return _ENGINE
+@spaces.GPU(duration=180)
+def generate(
+    image,
+    prompt,
+    lighting,
+    width,
+    height,
+    samples,
+    seed,
+    steps,
+    cfg,
+    highres_scale,
+    highres_denoise,
+    lowres_denoise,
+):
+    if not prompt or not prompt.strip():
+        raise gr.Error("Enter a prompt.")
+    engine = get_engine()
+    return engine.relight(
+        image,
+        prompt.strip(),
+        int(width),
+        int(height),
+        int(samples),
+        int(seed),
+        int(steps),
+        float(cfg),
+        float(highres_scale),
+        float(highres_denoise),
+        float(lowres_denoise),
+        lighting,
+    )
+quick_prompts = [
+    ["beautiful woman, detailed face, sunshine from window"],
+    ["handsome man, detailed face, neon light, city"],
+    ["portrait, cinematic lighting"],
+    ["product photo, soft studio lighting"],
+    ["character art, dramatic light and shadow"],
+]
+with gr.Blocks(title="IC-Light Relighting") as demo:
+    gr.Markdown("## IC-Light Relighting")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(sources=["upload"], type="numpy", label="Image", height=440)
+            prompt = gr.Textbox(label="Prompt", value="portrait, cinematic lighting")
+            lighting = gr.Radio(
+                choices=[e.value for e in BGSource],
+                value=BGSource.NONE.value,
+                label="Lighting Preference",
+            )
+            prompt_examples = gr.Dataset(
+                samples=quick_prompts,
+                label="Prompt Quick List",
+                components=[prompt],
+                samples_per_page=20,
+            )
+            prompt_examples.click(
+                lambda x: x[0],
+                inputs=prompt_examples,
+                outputs=prompt,
+                show_progress=False,
+                queue=False,
+            )
+            run_button = gr.Button("Relight", variant="primary")
+            with gr.Row():
+                samples = gr.Slider(label="Images", minimum=1, maximum=4, value=1, step=1)
+                seed = gr.Number(label="Seed", value=12345, precision=0)
+            with gr.Row():
+                width = gr.Slider(label="Width", minimum=256, maximum=1024, value=512, step=64)
+                height = gr.Slider(label="Height", minimum=256, maximum=1024, value=640, step=64)
+            with gr.Accordion("Advanced", open=False):
+                steps = gr.Slider(label="Steps", minimum=1, maximum=80, value=25, step=1)
+                cfg = gr.Slider(label="CFG Scale", minimum=1.0, maximum=16.0, value=2.0, step=0.1)
+                lowres_denoise = gr.Slider(
+                    label="Lowres Denoise",
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.9,
+                    step=0.01,
+                )
+                highres_scale = gr.Slider(
+                    label="Highres Scale",
+                    minimum=1.0,
+                    maximum=2.0,
+                    value=1.5,
+                    step=0.05,
+                )
+                highres_denoise = gr.Slider(
+                    label="Highres Denoise",
+                    minimum=0.1,
+                    maximum=1.0,
+                    value=0.5,
+                    step=0.01,
+                )
+        with gr.Column():
+            foreground = gr.Image(type="numpy", label="Preprocessed Foreground", height=360)
+            gallery = gr.Gallery(label="Outputs", height=720, object_fit="contain")
+    inputs = [
+        input_image,
+        prompt,
+        lighting,
+        width,
+        height,
+        samples,
+        seed,
+        steps,
+        cfg,
+        highres_scale,
+        highres_denoise,
+        lowres_denoise,
+    ]
+    run_button.click(fn=generate, inputs=inputs, outputs=[foreground, gallery])
+if __name__ == "__main__":
+    demo.queue(max_size=20).launch(server_name="0.0.0.0")

briarmbg.py ADDED Viewed

	@@ -0,0 +1,462 @@

+# RMBG1.4 (diffusers implementation)
+# Found on huggingface space of several projects
+# Not sure which project is the source of this file
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from huggingface_hub import PyTorchModelHubMixin
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+def _upsample_like(src, tar):
+    src = F.interpolate(src, size=tar.shape[2:], mode="bilinear")
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
+        super(RSU7, self).__init__()
+        self.in_ch = in_ch
+        self.mid_ch = mid_ch
+        self.out_ch = out_ch
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)  ## 1 -> 1/2
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        return hx1d + hxin
+class myrebnconv(nn.Module):
+    def __init__(
+        self,
+        in_ch=3,
+        out_ch=1,
+        kernel_size=3,
+        stride=1,
+        padding=1,
+        dilation=1,
+        groups=1,
+    ):
+        super(myrebnconv, self).__init__()
+        self.conv = nn.Conv2d(
+            in_ch,
+            out_ch,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_ch)
+        self.rl = nn.ReLU(inplace=True)
+    def forward(self, x):
+        return self.rl(self.bn(self.conv(x)))
+class BriaRMBG(nn.Module, PyTorchModelHubMixin):
+    def __init__(self, config: dict = {"in_ch": 3, "out_ch": 1}):
+        super(BriaRMBG, self).__init__()
+        in_ch = config["in_ch"]
+        out_ch = config["out_ch"]
+        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
+        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage1 = RSU7(64, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side2 = nn.Conv2d(64, out_ch, 3, padding=1)
+        self.side3 = nn.Conv2d(128, out_ch, 3, padding=1)
+        self.side4 = nn.Conv2d(256, out_ch, 3, padding=1)
+        self.side5 = nn.Conv2d(512, out_ch, 3, padding=1)
+        self.side6 = nn.Conv2d(512, out_ch, 3, padding=1)
+        # self.outconv = nn.Conv2d(6*out_ch,out_ch,1)
+    def forward(self, x):
+        hx = x
+        hxin = self.conv_in(hx)
+        # hx = self.pool_in(hxin)
+        # stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d1 = _upsample_like(d1, x)
+        d2 = self.side2(hx2d)
+        d2 = _upsample_like(d2, x)
+        d3 = self.side3(hx3d)
+        d3 = _upsample_like(d3, x)
+        d4 = self.side4(hx4d)
+        d4 = _upsample_like(d4, x)
+        d5 = self.side5(hx5d)
+        d5 = _upsample_like(d5, x)
+        d6 = self.side6(hx6)
+        d6 = _upsample_like(d6, x)
+        return [
+            F.sigmoid(d1),
+            F.sigmoid(d2),
+            F.sigmoid(d3),
+            F.sigmoid(d4),
+            F.sigmoid(d5),
+            F.sigmoid(d6),
+        ], [hx1d, hx2d, hx3d, hx4d, hx5d, hx6]

requirements.txt CHANGED Viewed

@@ -10,5 +10,7 @@ safetensors
 pillow
 einops
 peft
-pyzipper
 python-multipart==0.0.12

 pillow
 einops
 peft
+protobuf==3.20.*
+huggingface_hub
+spaces
 python-multipart==0.0.12