TRELLIS-Boceto3D

Runtime error

App Files Files Community

cavargas10 commited on Apr 9, 2025

Commit

ea3c5de

verified ·

1 Parent(s): 410cd67

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -52

app.py CHANGED Viewed

@@ -1,23 +1,21 @@
-import gradio as gr
 import spaces
-from gradio_litmodel3d import LitModel3D
 import os
 import shutil
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 import torch
-import torchvision.transforms.functional as TF
 import numpy as np
-import random
 import imageio
 import cv2
 from easydict import EasyDict as edict
 from PIL import Image, ImageOps
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
 from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
 from controlnet_aux import PidiNetDetector, HEDdetector
@@ -27,26 +25,16 @@ from pathlib import Path
 from gradio_imageslider import ImageSlider
 style_list = [
-    {
-        "name": "(No style)",
-        "prompt": "{prompt}",
-        "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
-    },
-    {
-        "name": "Cinematic",
-        "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy",
-        "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured",
-    },
     {
         "name": "3D Model",
         "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
         "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
     },
 ]
 styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
 STYLE_NAMES = list(styles.keys())
 DEFAULT_STYLE_NAME = "(No style)"
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
 os.makedirs(TMP_DIR, exist_ok=True)
@@ -61,26 +49,37 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str
 def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shutil.rmtree(user_dir)
 @spaces.GPU
-def preprocess_image(image: Image.Image,
-                    prompt: str = "",
-                    negative_prompt: str = "",
-                    style_name: str = "",
-                    num_steps: int = 25,
-                    guidance_scale: float = 5,
-                    controlnet_conditioning_scale: float = 1.0,
-                    ) -> Image.Image:
-    width, height  = image['composite'].size
     ratio = np.sqrt(1024. * 1024. / (width * height))
     new_width, new_height = int(width * ratio), int(height * ratio)
     image = image['composite'].resize((new_width, new_height))
     image = ImageOps.invert(image)
     prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
     output = pipe_control(
         prompt=prompt,
         negative_prompt=negative_prompt,
@@ -89,9 +88,15 @@ def preprocess_image(image: Image.Image,
         controlnet_conditioning_scale=controlnet_conditioning_scale,
         guidance_scale=guidance_scale,
         width=new_width,
-        height=new_height).images[0]
     processed_image = pipeline.preprocess_image(output)
-    return (image, processed_image)
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
@@ -108,7 +113,7 @@ def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
             'faces': mesh.faces.cpu().numpy(),
         },
     }
 def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
@@ -123,10 +128,12 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
     gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
     gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
     mesh = edict(
         vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
         faces=torch.tensor(state['mesh']['faces'], device='cuda'),
     )
     return gs, mesh
 def get_seed(randomize_seed: bool, seed: int) -> int:
@@ -143,11 +150,10 @@ def image_to_3d(
     req: gr.Request,
 ) -> Tuple[dict, str]:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
-    os.makedirs(user_dir, exist_ok=True)
     outputs = pipeline.run(
         image[1],
         seed=seed,
-        formats=["mesh"],
         preprocess_image=False,
         sparse_structure_sampler_params={
             "steps": ss_sampling_steps,
@@ -158,7 +164,9 @@ def image_to_3d(
             "cfg_strength": slat_guidance_strength,
         },
     )
-    video = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
     video_path = os.path.join(user_dir, 'sample.mp4')
     imageio.mimsave(video_path, video, fps=15)
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
@@ -182,14 +190,17 @@ def extract_glb(
 def reset_do_preprocess():
     return True
 with gr.Blocks(delete_cache=(600, 600)) as demo:
-    gr.Markdown("""
-    ## Sketch to 3D with TRELLIS
-    1. Fast sketch to image with SDXL Flash, using [@xinsir](https://huggingface.co/xinsir) [scribble sdxl controlnet](https://huggingface.co/xinsir/controlnet-scribble-sdxl-1.0) and [sdxl flash](https://huggingface.co/sd-community/sdxl-flash)
-    2. Scalable and versatile image to 3D generation using [TRELLIS](https://trellis3d.github.io/)
-    ### ð   ¨ð    ï¸   draw or upload a sketch and click "Generate" to create a 3D asset â  ¨
-    """)
     with gr.Row():
         with gr.Column():
             with gr.Column():
@@ -200,9 +211,11 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                 with gr.Row():
                     prompt = gr.Textbox(label="Prompt")
                     style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
                 with gr.Accordion(label="Generation Settings", open=False):
                     with gr.Tab(label="sketch-to-image generation"):
                         negative_prompt = gr.Textbox(label="Negative prompt")
                         num_steps = gr.Slider(
                         label="Number of steps",
                         minimum=1,
@@ -232,32 +245,54 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
                             ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                             ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                         gr.Markdown("Stage 2: Structured Latent Generation")
-                        with gr.Row():
                             slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                             slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
             with gr.Accordion(label="GLB Extraction Settings", open=False):
                 mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
                 texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
             with gr.Row():
                 extract_glb_btn = gr.Button("Extract GLB", interactive=False)
             gr.Markdown("""
-                        *NOTE: GLB file can be downloaded after extraction.*
                         """)
         with gr.Column():
             video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
             image_prompt_processed = ImageSlider(label="processed sketch", interactive=False, type="pil", height=512)
-            model_output = LitModel3D(label="Extracted GLB", exposure=10.0, height=300)
             with gr.Row():
-                download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
     output_buf = gr.State()
     demo.load(start_session)
     demo.unload(end_session)
     image_prompt.clear(
         fn=reset_canvas,
         outputs = [image_prompt]
     )
     sketch_btn.click(
         get_seed,
         inputs=[randomize_seed, seed],
@@ -277,13 +312,15 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         inputs=[image_prompt_processed, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
         outputs=[output_buf, video_output],
     ).then(
-        lambda: gr.Button(interactive=True),
-        outputs=[extract_glb_btn],
     )
     video_output.clear(
-        lambda: gr.Button(interactive=False),
-        outputs=[extract_glb_btn],
     )
     extract_glb_btn.click(
         extract_glb,
         inputs=[output_buf, mesh_simplify, texture_size],
@@ -292,21 +329,35 @@ with gr.Blocks(delete_cache=(600, 600)) as demo:
         lambda: gr.Button(interactive=True),
         outputs=[download_glb],
     )
     model_output.clear(
         lambda: gr.Button(interactive=False),
         outputs=[download_glb],
     )
 if __name__ == "__main__":
-    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
     pipeline.cuda()
     device = "cuda" if torch.cuda.is_available() else "cpu"
     #scribble controlnet
     controlnet = ControlNetModel.from_pretrained(
     "xinsir/controlnet-scribble-sdxl-1.0",
     torch_dtype=torch.float16
 )
     vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
     pipe_control = StableDiffusionXLControlNetPipeline.from_pretrained(
         "sd-community/sdxl-flash",
         controlnet=controlnet,
@@ -315,8 +366,9 @@ if __name__ == "__main__":
     )
     pipe_control.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_control.scheduler.config)
     pipe_control.to(device)
     try:
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
-    demo.launch()

+import gradio as gr
 import spaces
 import os
 import shutil
+import random
 os.environ['SPCONV_ALGO'] = 'native'
 from typing import *
 import torch
 import numpy as np
 import imageio
 import cv2
+import torchvision.transforms.functional as TF
+from gradio_litmodel3d import LitModel3D
 from easydict import EasyDict as edict
 from PIL import Image, ImageOps
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
 from diffusers import DDIMScheduler, EulerAncestralDiscreteScheduler
 from controlnet_aux import PidiNetDetector, HEDdetector
 from gradio_imageslider import ImageSlider
 style_list = [
     {
         "name": "3D Model",
         "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
         "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
     },
 ]
 styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
 STYLE_NAMES = list(styles.keys())
 DEFAULT_STYLE_NAME = "(No style)"
 MAX_SEED = np.iinfo(np.int32).max
 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
 os.makedirs(TMP_DIR, exist_ok=True)
 def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shutil.rmtree(user_dir)
 @spaces.GPU
+def preprocess_image(
+    image: Image.Image,
+    prompt: str = "",
+    negative_prompt: str = "",
+    style_name: str = "",
+    num_steps: int = 25,
+    guidance_scale: float = 5,
+    controlnet_conditioning_scale: float = 1.0,
+    req: gr.Request = None
+) -> Tuple[Image.Image, Image.Image]:
+    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    os.makedirs(user_dir, exist_ok=True)
+    width, height = image['composite'].size
     ratio = np.sqrt(1024. * 1024. / (width * height))
     new_width, new_height = int(width * ratio), int(height * ratio)
     image = image['composite'].resize((new_width, new_height))
     image = ImageOps.invert(image)
+    print("image:", type(image))
     prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
+    print("params:", prompt, negative_prompt, style_name, num_steps, guidance_scale, controlnet_conditioning_scale)
     output = pipe_control(
         prompt=prompt,
         negative_prompt=negative_prompt,
         controlnet_conditioning_scale=controlnet_conditioning_scale,
         guidance_scale=guidance_scale,
         width=new_width,
+        height=new_height
+    ).images[0]
+    processed_image_path = os.path.join(user_dir, 'processed_image.png')
+    output.save(processed_image_path)
     processed_image = pipeline.preprocess_image(output)
+    return image, processed_image
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
             'faces': mesh.faces.cpu().numpy(),
         },
     }
 def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
     gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
     gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
     gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')
     mesh = edict(
         vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
         faces=torch.tensor(state['mesh']['faces'], device='cuda'),
     )
     return gs, mesh
 def get_seed(randomize_seed: bool, seed: int) -> int:
     req: gr.Request,
 ) -> Tuple[dict, str]:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     outputs = pipeline.run(
         image[1],
         seed=seed,
+        formats=["gaussian", "mesh"],
         preprocess_image=False,
         sparse_structure_sampler_params={
             "steps": ss_sampling_steps,
             "cfg_strength": slat_guidance_strength,
         },
     )
+    video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
+    video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
+    video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, 'sample.mp4')
     imageio.mimsave(video_path, video, fps=15)
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
 def reset_do_preprocess():
     return True
+@spaces.GPU
+def extract_gaussian(state: dict, req: gr.Request) -> Tuple[str, str]:
+    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
+    gs, _ = unpack_state(state)
+    gaussian_path = os.path.join(user_dir, 'sample.ply')
+    gs.save_ply(gaussian_path)
+    torch.cuda.empty_cache()
+    return gaussian_path, gaussian_path
 with gr.Blocks(delete_cache=(600, 600)) as demo:
     with gr.Row():
         with gr.Column():
             with gr.Column():
                 with gr.Row():
                     prompt = gr.Textbox(label="Prompt")
                     style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
                 with gr.Accordion(label="Generation Settings", open=False):
                     with gr.Tab(label="sketch-to-image generation"):
                         negative_prompt = gr.Textbox(label="Negative prompt")
                         num_steps = gr.Slider(
                         label="Number of steps",
                         minimum=1,
                             ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                             ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                         gr.Markdown("Stage 2: Structured Latent Generation")
+                    with gr.Row():
                             slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                             slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
             with gr.Accordion(label="GLB Extraction Settings", open=False):
                 mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify", value=0.95, step=0.01)
                 texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
             with gr.Row():
                 extract_glb_btn = gr.Button("Extract GLB", interactive=False)
+                extract_gs_btn = gr.Button("Extract Gaussian", interactive=False)
             gr.Markdown("""
+                        *NOTE: Gaussian file can be very large (~50MB), it will take a while to display and download.*
                         """)
         with gr.Column():
             video_output = gr.Video(label="Generated 3D Asset", autoplay=True, loop=True, height=300)
             image_prompt_processed = ImageSlider(label="processed sketch", interactive=False, type="pil", height=512)
+            model_output = LitModel3D(label="Extracted GLB/Gaussian", exposure=10.0, height=300)
             with gr.Row():
+                download_glb = gr.DownloadButton(label="Download GLB", interactive=False)
+                download_gs = gr.DownloadButton(label="Download Gaussian", interactive=False)
+    do_preprocess = gr.State(True)
     output_buf = gr.State()
+    with gr.Row(visible=False) as single_image_example:
+        examples = gr.Examples(
+            examples=[
+                f'assets/example_image/{image}'
+                for image in os.listdir("assets/example_image")
+            ],
+            inputs=[image_prompt],
+            fn=preprocess_image,
+            outputs=[image_prompt_processed],
+            run_on_click=True,
+            examples_per_page=64,
+        )
     demo.load(start_session)
     demo.unload(end_session)
     image_prompt.clear(
         fn=reset_canvas,
         outputs = [image_prompt]
     )
     sketch_btn.click(
         get_seed,
         inputs=[randomize_seed, seed],
         inputs=[image_prompt_processed, seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
         outputs=[output_buf, video_output],
     ).then(
+        lambda: tuple([gr.Button(interactive=True), gr.Button(interactive=True)]),
+        outputs=[extract_glb_btn, extract_gs_btn],
     )
     video_output.clear(
+        lambda: tuple([gr.Button(interactive=False), gr.Button(interactive=False)]),
+        outputs=[extract_glb_btn, extract_gs_btn],
     )
     extract_glb_btn.click(
         extract_glb,
         inputs=[output_buf, mesh_simplify, texture_size],
         lambda: gr.Button(interactive=True),
         outputs=[download_glb],
     )
+    extract_gs_btn.click(
+        extract_gaussian,
+        inputs=[output_buf],
+        outputs=[model_output, download_gs],
+    ).then(
+        lambda: gr.Button(interactive=True),
+        outputs=[download_gs],
+    )
     model_output.clear(
         lambda: gr.Button(interactive=False),
         outputs=[download_glb],
     )
+# Launch the Gradio app
 if __name__ == "__main__":
+    pipeline = TrellisImageTo3DPipeline.from_pretrained("cavargas10/TRELLIS")
     pipeline.cuda()
     device = "cuda" if torch.cuda.is_available() else "cpu"
     #scribble controlnet
     controlnet = ControlNetModel.from_pretrained(
     "xinsir/controlnet-scribble-sdxl-1.0",
     torch_dtype=torch.float16
 )
     vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
     pipe_control = StableDiffusionXLControlNetPipeline.from_pretrained(
         "sd-community/sdxl-flash",
         controlnet=controlnet,
     )
     pipe_control.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_control.scheduler.config)
     pipe_control.to(device)
     try:
         pipeline.preprocess_image(Image.fromarray(np.zeros((512, 512, 3), dtype=np.uint8)))    # Preload rembg
     except:
         pass
+    demo.launch(show_error=True)