TRELLIS-Boceto3D

Runtime error

App Files Files Community

cavargas10 commited on Apr 9, 2025

Commit

a65fb48

verified ·

1 Parent(s): ea3c5de

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -51

app.py CHANGED Viewed

@@ -1,35 +1,60 @@
-import gradio as gr
 import spaces
 import os
 import shutil
-import random
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 import torch
 import numpy as np
 import imageio
-import cv2
-import torchvision.transforms.functional as TF
-from gradio_litmodel3d import LitModel3D
 from easydict import EasyDict as edict
 from PIL import Image, ImageOps
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
 from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
 from controlnet_aux import PidiNetDetector, HEDdetector
 from diffusers.utils import load_image
 from huggingface_hub import HfApi
 from pathlib import Path
 from gradio_imageslider import ImageSlider
 style_list = [
     {
         "name": "3D Model",
         "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
         "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
     },
 ]
 styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
 STYLE_NAMES = list(styles.keys())
@@ -50,36 +75,31 @@ def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shutil.rmtree(user_dir)
 @spaces.GPU
-def preprocess_image(
-    image: Image.Image,
-    prompt: str = "",
-    negative_prompt: str = "",
-    style_name: str = "",
-    num_steps: int = 25,
-    guidance_scale: float = 5,
-    controlnet_conditioning_scale: float = 1.0,
-    req: gr.Request = None
-) -> Tuple[Image.Image, Image.Image]:
-    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(user_dir, exist_ok=True)
-    width, height = image['composite'].size
     ratio = np.sqrt(1024. * 1024. / (width * height))
     new_width, new_height = int(width * ratio), int(height * ratio)
     image = image['composite'].resize((new_width, new_height))
     image = ImageOps.invert(image)
-    print("image:", type(image))
     prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
     print("params:", prompt, negative_prompt, style_name, num_steps, guidance_scale, controlnet_conditioning_scale)
     output = pipe_control(
         prompt=prompt,
         negative_prompt=negative_prompt,
@@ -88,15 +108,15 @@ def preprocess_image(
         controlnet_conditioning_scale=controlnet_conditioning_scale,
         guidance_scale=guidance_scale,
         width=new_width,
-        height=new_height
-    ).images[0]
-    processed_image_path = os.path.join(user_dir, 'processed_image.png')
-    output.save(processed_image_path)
     processed_image = pipeline.preprocess_image(output)
-    return image, processed_image
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
@@ -142,28 +162,48 @@ def get_seed(randomize_seed: bool, seed: int) -> int:
 @spaces.GPU
 def image_to_3d(
     image: Image.Image,
     seed: int,
     ss_guidance_strength: float,
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
     req: gr.Request,
 ) -> Tuple[dict, str]:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    outputs = pipeline.run(
-        image[1],
-        seed=seed,
-        formats=["gaussian", "mesh"],
-        preprocess_image=False,
-        sparse_structure_sampler_params={
-            "steps": ss_sampling_steps,
-            "cfg_strength": ss_guidance_strength,
-        },
-        slat_sampler_params={
-            "steps": slat_sampling_steps,
-            "cfg_strength": slat_guidance_strength,
-        },
-    )
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
@@ -200,7 +240,38 @@ def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
     torch.cuda.empty_cache()
     return gaussian_path, gaussian_path
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     with gr.Row():
         with gr.Column():
             with gr.Column():
@@ -245,10 +316,14 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                             ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                             ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                         gr.Markdown("Stage 2: Structured Latent Generation")
-                    with gr.Row():
                             slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                             slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
             with gr.Accordion(label="GLB Extraction Settings", open=False):
                 mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
                 texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
@@ -269,6 +344,7 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                 download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
                 download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
     do_preprocess = gr.State(True)
     output_buf = gr.State()
@@ -284,10 +360,23 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
             run_on_click=True,
             examples_per_page=64,
         )
     demo.load(start_session)
     demo.unload(end_session)
     image_prompt.clear(
         fn=reset_canvas,
         outputs = [image_prompt]
@@ -302,6 +391,11 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         inputs=[image_prompt, prompt, negative_prompt, style, num_steps, guidance_scale, controlnet_conditioning_scale],
         outputs=[image_prompt_processed],
     )
     generate_btn.click(
         get_seed,
@@ -309,7 +403,7 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         outputs=[seed],
     ).then(
         image_to_3d,
-        inputs=[image_prompt_processed, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
         outputs=[output_buf, video_output],
     ).then(
         lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
@@ -343,15 +437,13 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         lambda: gr.Button(interactive=False),
         outputs=[download_glb],
     )
-# Launch the Gradio app
 if __name__ == "__main__":
-    pipeline = TrellisImageTo3DPipeline.from_pretrained("cavargas10/TRELLIS")
     pipeline.cuda()
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    #scribble controlnet
     controlnet = ControlNetModel.from_pretrained(
     "xinsir/controlnet-scribble-sdxl-1.0",
     torch_dtype=torch.float16
@@ -371,4 +463,4 @@ if __name__ == "__main__":
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
-    demo.launch(show_error=True)

+import gradio as gr
 import spaces
+from gradio_litmodel3d import LitModel3D
 import os
 import shutil
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 import torch
 import numpy as np
 import imageio
 from easydict import EasyDict as edict
 from PIL import Image, ImageOps
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
+import os
+import random
+import torch
+import torchvision.transforms.functional as TF
 from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
 from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
 from controlnet_aux import PidiNetDetector, HEDdetector
 from diffusers.utils import load_image
 from huggingface_hub import HfApi
 from pathlib import Path
+from PIL import Image, ImageOps
+import torch
+import numpy as np
+import cv2
+import os
+import random
 from gradio_imageslider import ImageSlider
 style_list = [
+    {
+        "name": "(No style)",
+        "prompt": "{prompt}",
+        "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
+    },
+    {
+        "name": "Cinematic",
+        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
+        "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
+    },
     {
         "name": "3D Model",
         "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
         "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
     },
+    {
+        "name": "Anime",
+        "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime,  highly detailed",
+        "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast",
+    },
 ]
 styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
 STYLE_NAMES = list(styles.keys())
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shutil.rmtree(user_dir)
 @spaces.GPU
+def preprocess_image(image: Image.Image,
+                    prompt: str = "",
+                    negative_prompt: str = "",
+                    style_name: str = "",
+                    num_steps: int = 25,
+                    guidance_scale: float = 5,
+                    controlnet_conditioning_scale: float = 1.0,
+                    ) -> Image.Image:
+    width, height  = image['composite'].size
     ratio = np.sqrt(1024. * 1024. / (width * height))
     new_width, new_height = int(width * ratio), int(height * ratio)
     image = image['composite'].resize((new_width, new_height))
     image = ImageOps.invert(image)
+    print("image:",type(image))
     prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
     print("params:", prompt, negative_prompt, style_name, num_steps, guidance_scale, controlnet_conditioning_scale)
     output = pipe_control(
         prompt=prompt,
         negative_prompt=negative_prompt,
         controlnet_conditioning_scale=controlnet_conditioning_scale,
         guidance_scale=guidance_scale,
         width=new_width,
+        height=new_height).images[0]
     processed_image = pipeline.preprocess_image(output)
+    return (image, processed_image)
+def preprocess_images(images: List[Tuple[Image.Image, str]]) -> List[Image.Image]:
+    images = [image[0] for image in images]
+    processed_images = [pipeline.preprocess_image(image) for image in images]
+    return processed_images
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
 @spaces.GPU
 def image_to_3d(
     image: Image.Image,
+    multiimages: List[Tuple[Image.Image, str]],
+    is_multiimage: bool,
     seed: int,
     ss_guidance_strength: float,
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
+    multiimage_algo: Literal["multidiffusion", "stochastic"],
     req: gr.Request,
 ) -> Tuple[dict, str]:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    if not is_multiimage:
+        outputs = pipeline.run(
+            image[1],
+            seed=seed,
+            formats=["gaussian", "mesh"],
+            preprocess_image=False,
+            sparse_structure_sampler_params={
+                "steps": ss_sampling_steps,
+                "cfg_strength": ss_guidance_strength,
+            },
+            slat_sampler_params={
+                "steps": slat_sampling_steps,
+                "cfg_strength": slat_guidance_strength,
+            },
+        )
+    else:
+        outputs = pipeline.run_multi_image(
+            [image[0] for image in multiimages],
+            seed=seed,
+            formats=["gaussian", "mesh"],
+            preprocess_image=False,
+            sparse_structure_sampler_params={
+                "steps": ss_sampling_steps,
+                "cfg_strength": ss_guidance_strength,
+            },
+            slat_sampler_params={
+                "steps": slat_sampling_steps,
+                "cfg_strength": slat_guidance_strength,
+            },
+            mode=multiimage_algo,
+        )
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     torch.cuda.empty_cache()
     return gaussian_path, gaussian_path
+def prepare_multi_example() -> List[Image.Image]:
+    multi_case = list(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")]))
+    images = []
+    for case in multi_case:
+        _images = []
+        for i in range(1, 4):
+            img = Image.open(f'assets/example_multi_image/{case}_{i}.png')
+            W, H = img.size
+            img = img.resize((int(W / H * 512), 512))
+            _images.append(np.array(img))
+        images.append(Image.fromarray(np.concatenate(_images, axis=1)))
+    return images
+def split_image(image: Image.Image) -> List[Image.Image]:
+    image = np.array(image)
+    alpha = image[..., 3]
+    alpha = np.any(alpha>0, axis=0)
+    start_pos = np.where(~alpha[:-1] & alpha[1:])[0].tolist()
+    end_pos = np.where(alpha[:-1] & ~alpha[1:])[0].tolist()
+    images = []
+    for s, e in zip(start_pos, end_pos):
+        images.append(Image.fromarray(image[:, s:e+1]))
+    return [preprocess_image(image) for image in images]
 with gr.Blocks(delete_cache=(600, 600)) as demo:
+    gr.Markdown("""
+    ## Sketch to 3D with TRELLIS
+    1. Fast sketch to image with SDXL Flash, using [@xinsir](https://huggingface.co/xinsir) [scribble sdxl controlnet](https://huggingface.co/xinsir/controlnet-scribble-sdxl-1.0) and [sdxl flash](https://huggingface.co/sd-community/sdxl-flash)
+    2. Scalable and versatile image to 3D generation using [TRELLIS](https://trellis3d.github.io/)
+    ### 🎨🖌️ draw or upload a sketch and click "Generate" to create a 3D asset ✨
+    """)
     with gr.Row():
         with gr.Column():
             with gr.Column():
                             ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                             ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                         gr.Markdown("Stage 2: Structured Latent Generation")
+                        with gr.Row():
                             slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                             slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
+                        multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Multi-image Algorithm", value="stochastic")
+            with gr.Tab(label="Multiple Images", id=1, visible=False) as multiimage_input_tab:
+                    multiimage_prompt = gr.Gallery(label="Image Prompt", format="png", type="pil", height=300, columns=3)
             with gr.Accordion(label="GLB Extraction Settings", open=False):
                 mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
                 texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
                 download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
                 download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
+    is_multiimage = gr.State(False)
     do_preprocess = gr.State(True)
     output_buf = gr.State()
             run_on_click=True,
             examples_per_page=64,
         )
+    with gr.Row(visible=False) as multiimage_example:
+        examples_multi = gr.Examples(
+            examples=prepare_multi_example(),
+            inputs=[image_prompt],
+            fn=split_image,
+            outputs=[multiimage_prompt],
+            run_on_click=True,
+            examples_per_page=8,
+        )
     demo.load(start_session)
     demo.unload(end_session)
+    multiimage_input_tab.select(
+        lambda: tuple([True, gr.Row.update(visible=False), gr.Row.update(visible=True)]),
+        outputs=[is_multiimage, single_image_example, multiimage_example]
+    )
     image_prompt.clear(
         fn=reset_canvas,
         outputs = [image_prompt]
         inputs=[image_prompt, prompt, negative_prompt, style, num_steps, guidance_scale, controlnet_conditioning_scale],
         outputs=[image_prompt_processed],
     )
+    multiimage_prompt.upload(
+        preprocess_images,
+        inputs=[multiimage_prompt],
+        outputs=[multiimage_prompt],
+    )
     generate_btn.click(
         get_seed,
         outputs=[seed],
     ).then(
         image_to_3d,
+        inputs=[image_prompt_processed, multiimage_prompt, is_multiimage, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps, multiimage_algo],
         outputs=[output_buf, video_output],
     ).then(
         lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
         lambda: gr.Button(interactive=False),
         outputs=[download_glb],
     )
 if __name__ == "__main__":
+    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
     pipeline.cuda()
     device = "cuda" if torch.cuda.is_available() else "cpu"
     controlnet = ControlNetModel.from_pretrained(
     "xinsir/controlnet-scribble-sdxl-1.0",
     torch_dtype=torch.float16
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
+    demo.launch()