TRELLIS-Boceto3D

Runtime error

App Files Files Community

cavargas10 commited on Mar 13, 2025

Commit

945c4e6

verified ·

1 Parent(s): 6b439c8

Update app.py

Browse files

Files changed (1) hide show

app.py +179 -114

app.py CHANGED Viewed

@@ -3,9 +3,6 @@ import spaces
 from gradio_litmodel3d import LitModel3D
 import os
 import shutil
-os.environ['SPCONV_ALGO'] = 'native'
-from typing import *
-import torch
 import numpy as np
 import imageio
 from easydict import EasyDict as edict
@@ -13,59 +10,88 @@ from PIL import Image, ImageOps
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
 from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
-from diffusers import EulerAncestralDiscreteScheduler
-from pathlib import Path
 style_list = [
-    {"name": "(No style)", "prompt": "{prompt}", "negative_prompt": ""},
-    {"name": "Cinematic", "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", "negative_prompt": "anime, cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured"},
-    {"name": "3D Model", "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting", "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting"},
 ]
 styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
 STYLE_NAMES = list(styles.keys())
 DEFAULT_STYLE_NAME = "(No style)"
-MAX_SEED = np.iinfo(np.int32).max
-TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
-os.makedirs(TMP_DIR, exist_ok=True)
-def apply_style(style_name: str, positive: str, negative: str = "") -> tuple[str, str]:
     p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
-    return p.replace("{prompt}", positive), n + negative
 def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shutil.rmtree(user_dir)
-@spaces.GPU
-def preprocess_image(image: Image.Image,
-                    prompt: str,
-                    negative_prompt: str,
-                    style_name: str,
-                    num_steps: int,
-                    guidance_scale: float,
-                    controlnet_conditioning_scale: float) -> Image.Image:
-    width, height = image.size
-    ratio = np.sqrt(1024 * 1024 / (width * height))
-    new_size = (int(width * ratio), int(height * ratio))
-    image = image.resize(new_size)
-    prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
-    output = pipe_control(
         prompt=prompt,
         negative_prompt=negative_prompt,
-        image=image,
         num_inference_steps=num_steps,
         controlnet_conditioning_scale=controlnet_conditioning_scale,
         guidance_scale=guidance_scale,
     ).images[0]
-    return pipeline.preprocess_image(output)
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
@@ -83,7 +109,7 @@ def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
         },
     }
-def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
         sh_degree=state['gaussian']['sh_degree'],
@@ -105,6 +131,9 @@ def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
     return gs, mesh
 @spaces.GPU
 def image_to_3d(
     image: Image.Image,
@@ -113,9 +142,10 @@ def image_to_3d(
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
-    req: gr.Request,
 ) -> Tuple[dict, str]:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     outputs = pipeline.run(
         image,
         seed=seed,
@@ -129,14 +159,18 @@ def image_to_3d(
             "cfg_strength": slat_guidance_strength,
         },
     )
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
-    video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
     video_path = os.path.join(user_dir, 'sample.mp4')
-    imageio.mimsave(video_path, video, fps=15)
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
     torch.cuda.empty_cache()
     return state, video_path
 @spaces.GPU(duration=90)
@@ -144,103 +178,134 @@ def extract_glb(
     state: dict,
     mesh_simplify: float,
     texture_size: int,
-    req: gr.Request,
-) -> Tuple[str, str]:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     gs, mesh = unpack_state(state)
-    glb = postprocessing_utils.to_glb(gs, mesh, simplify=mesh_simplify, texture_size=texture_size, verbose=False)
-    glb_path = os.path.join(user_dir, 'sample.glb')
     glb.export(glb_path)
     torch.cuda.empty_cache()
-    return glb_path, glb_path
 with gr.Blocks() as demo:
-    gr.Markdown("# Sketch to 3D with TRELLIS")
     with gr.Row():
-        with gr.Column():
-            image_prompt = gr.Image(label="Sketch Input", type="pil", image_mode="RGBA", height=512)
-            prompt = gr.Textbox(label="Prompt", placeholder="Describe tu modelo 3D")
-            style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME)
-            with gr.Accordion("Generation Settings", open=False):
-                num_steps = gr.Slider(1, 20, label="Steps", value=8, step=1)
-                guidance_scale = gr.Slider(0.1, 10.0, label="Guidance Scale", value=5.0, step=0.1)
-                controlnet_scale = gr.Slider(0.5, 5.0, label="ControlNet Scale", value=0.85, step=0.01)
-                seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
-                randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
-                with gr.Group():
-                    gr.Markdown("#### Stage 1: Structure")
-                    ss_guidance = gr.Slider(0.0, 10.0, label="Guidance", value=7.5, step=0.1)
-                    ss_steps = gr.Slider(1, 50, label="Steps", value=12, step=1)
-                with gr.Group():
-                    gr.Markdown("#### Stage 2: Detail")
-                    slat_guidance = gr.Slider(0.0, 10.0, label="Guidance", value=3.0, step=0.1)
-                    slat_steps = gr.Slider(1, 50, label="Steps", value=12, step=1)
-            generate_btn = gr.Button("Generate 3D Model", variant="primary")
-            with gr.Accordion("Export Settings", open=False):
-                mesh_simplify = gr.Slider(0.9, 0.98, label="Simplify Mesh", value=0.95, step=0.01)
-                texture_size = gr.Slider(512, 2048, label="Texture Size", value=1024, step=512)
-            extract_btn = gr.Button("Export GLB", interactive=False)
-        with gr.Column():
-            video_output = gr.Video(label="3D Preview", autoplay=True, loop=True, height=300)
-            model_viewer = LitModel3D(label="3D Model Viewer", height=400)
-            download_btn = gr.DownloadButton("Download GLB", interactive=False)
-    output_state = gr.State()
-    demo.load(start_session)
-    demo.unload(end_session)
     generate_btn.click(
-        lambda rand, s: np.random.randint(0, MAX_SEED) if rand else s,
         inputs=[randomize_seed, seed],
-        outputs=[seed],
     ).then(
-        preprocess_image,
-        inputs=[image_prompt, prompt, gr.Textbox(), style, num_steps, guidance_scale, controlnet_scale],
-        outputs=[image_prompt],
     ).then(
-        image_to_3d,
-        inputs=[image_prompt, seed, ss_guidance, ss_steps, slat_guidance, slat_steps],
-        outputs=[output_state, video_output],
     ).then(
-        lambda: gr.Button(interactive=True),
-        outputs=[extract_btn],
     )
-    extract_btn.click(
-        extract_glb,
-        inputs=[output_buf, mesh_simplify, texture_size],
-        outputs=[model_output, download_glb],
-    ).then(
-        lambda: gr.Button(interactive=True),
-        outputs=[download_glb],
     )
 if __name__ == "__main__":
-    # TRELLIS pipeline
-    pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
-    pipeline.cuda()
-    # ControlNet y SDXL
-    controlnet = ControlNetModel.from_pretrained(
-        "xinsir/controlnet-scribble-sdxl-1.0",
-        torch_dtype=torch.float16
-    )
-    vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
-    pipe_control = StableDiffusionXLControlNetPipeline.from_pretrained(
-        "sd-community/sdxl-flash",
-        controlnet=controlnet,
-        vae=vae,
-        torch_dtype=torch.float16,
-    )
-    pipe_control.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_control.scheduler.config)
-    pipe_control.to("cuda")
-    demo.launch()

 from gradio_litmodel3d import LitModel3D
 import os
 import shutil
 import numpy as np
 import imageio
 from easydict import EasyDict as edict
 from trellis.pipelines import TrellisImageTo3DPipeline
 from trellis.representations import Gaussian, MeshExtractResult
 from trellis.utils import render_utils, postprocessing_utils
+import torch
 from diffusers import ControlNetModel, StableDiffusionXLControlNetPipeline, AutoencoderKL
+from controlnet_aux import PidiNetDetector, HEDdetector
+os.environ['SPCONV_ALGO'] = 'native'
+MAX_SEED = np.iinfo(np.int32).max
+TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
+os.makedirs(TMP_DIR, exist_ok=True)
+# Configuración de estilos
 style_list = [
+    {
+        "name": "(No style)",
+        "prompt": "{prompt}",
+        "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
+    },
+    {
+        "name": "3D Model",
+        "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting",
+        "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting",
+    },
+    # ... (otros estilos)
 ]
 styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list}
 STYLE_NAMES = list(styles.keys())
 DEFAULT_STYLE_NAME = "(No style)"
+def apply_style(style_name: str, prompt: str, negative: str = "") -> tuple:
     p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
+    return p.replace("{prompt}", prompt), n + negative
 def start_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     os.makedirs(user_dir, exist_ok=True)
 def end_session(req: gr.Request):
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     shutil.rmtree(user_dir)
+# Inicialización de ControlNet
+controlnet = ControlNetModel.from_pretrained(
+    "xinsir/controlnet-scribble-sdxl-1.0",
+    torch_dtype=torch.float16
+)
+vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
+pipe_control = StableDiffusionXLControlNetPipeline.from_pretrained(
+    "sd-community/sdxl-flash",
+    controlnet=controlnet,
+    vae=vae,
+    torch_dtype=torch.float16,
+)
+pipe_control.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe_control.scheduler.config)
+pipe_control.to("cuda")
+# Inicialización de TRELLIS
+pipeline = TrellisImageTo3DPipeline.from_pretrained("JeffreyXiang/TRELLIS-image-large")
+pipeline.cuda()
+def preprocess_image(
+    image: Image.Image,
+    prompt: str,
+    style_name: str,
+    num_steps: int = 20,
+    guidance_scale: float = 5,
+    controlnet_conditioning_scale: float = 0.85
+) -> Image.Image:
+    # Aplicar estilo
+    prompt, negative_prompt = apply_style(style_name, prompt)
+    # Procesar con ControlNet
+    processed_image = pipe_control(
         prompt=prompt,
         negative_prompt=negative_prompt,
+        image=image.convert("RGB"),
         num_inference_steps=num_steps,
         controlnet_conditioning_scale=controlnet_conditioning_scale,
         guidance_scale=guidance_scale,
+        width=512,
+        height=512
     ).images[0]
+    return processed_image
 def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
     return {
         },
     }
+def unpack_state(state: dict) -> Tuple[Gaussian, edict]:
     gs = Gaussian(
         aabb=state['gaussian']['aabb'],
         sh_degree=state['gaussian']['sh_degree'],
     return gs, mesh
+def get_seed(randomize_seed: bool, seed: int) -> int:
+    return np.random.randint(0, MAX_SEED) if randomize_seed else seed
 @spaces.GPU
 def image_to_3d(
     image: Image.Image,
     ss_sampling_steps: int,
     slat_guidance_strength: float,
     slat_sampling_steps: int,
+    req: gr.Request
 ) -> Tuple[dict, str]:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     outputs = pipeline.run(
         image,
         seed=seed,
             "cfg_strength": slat_guidance_strength,
         },
     )
+    # Renderizar video
     video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
     video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
+    combined_video = [np.concatenate([frame, geo], axis=1) for frame, geo in zip(video, video_geo)]
     video_path = os.path.join(user_dir, 'sample.mp4')
+    imageio.mimsave(video_path, combined_video, fps=15)
+    # Empaquetar estado
     state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])
     torch.cuda.empty_cache()
     return state, video_path
 @spaces.GPU(duration=90)
     state: dict,
     mesh_simplify: float,
     texture_size: int,
+    req: gr.Request
+) -> str:
     user_dir = os.path.join(TMP_DIR, str(req.session_hash))
     gs, mesh = unpack_state(state)
+    glb = postprocessing_utils.to_glb(
+        gs,
+        mesh,
+        simplify=mesh_simplify,
+        texture_size=texture_size,
+        verbose=False
+    )
+    glb_path = os.path.join(user_dir, 'model.glb')
     glb.export(glb_path)
     torch.cuda.empty_cache()
+    return glb_path
 with gr.Blocks() as demo:
+    gr.Markdown("""
+    # Conversor de Bocetos a 3D
+    ### Carga un boceto, ajusta parámetros y genera un modelo 3D
+    """)
     with gr.Row():
+        with gr.Column(scale=1):
+            # Entrada de boceto
+            image_prompt = gr.Image(
+                label="Boceto",
+                type="pil",
+                tool="sketch",
+                image_mode="RGBA",
+                height=512
+            )
+            # Parámetros
+            with gr.Accordion("Configuración", open=True):
+                prompt = gr.Textbox(label="Prompt", value="3D model")
+                style = gr.Dropdown(
+                    label="Estilo",
+                    choices=STYLE_NAMES,
+                    value=DEFAULT_STYLE_NAME
+                )
+                with gr.Tab("ControlNet"):
+                    num_steps = gr.Slider(5, 30, value=20, label="Pasos")
+                    guidance_scale = gr.Slider(0.1, 10, value=5, label="Guidance Scale")
+                    controlnet_scale = gr.Slider(0.5, 1.5, value=0.85, label="ControlNet Scale")
+                with gr.Tab("Generación 3D"):
+                    seed = gr.Slider(0, MAX_SEED, value=42, label="Seed")
+                    randomize_seed = gr.Checkbox(True, label="Randomizar Seed")
+                    with gr.Group():
+                        gr.Markdown("Estructura (Stage 1)")
+                        ss_guidance = gr.Slider(0, 10, value=7.5, label="Guidance Strength")
+                        ss_steps = gr.Slider(5, 20, value=12, label="Pasos")
+                    with gr.Group():
+                        gr.Markdown("Detalles (Stage 2)")
+                        slat_guidance = gr.Slider(0, 10, value=3.0, label="Guidance Strength")
+                        slat_steps = gr.Slider(5, 20, value=12, label="Pasos")
+            generate_btn = gr.Button("Generar 3D", variant="primary")
+        with gr.Column(scale=2):
+            video_output = gr.Video(
+                label="Vista 3D",
+                height=400,
+                interactive=False
+            )
+            model_output = LitModel3D(
+                label="Modelo 3D",
+                height=300,
+                exposure=10.0
+            )
+            download_btn = gr.Download(
+                label="Descargar GLB",
+                interactive=False
+            )
+    # Estado interno
+    output_buf = gr.State()
+    is_processing = gr.State(False)
+    # Eventos
     generate_btn.click(
+        fn=lambda: gr.update(interactive=False),
+        outputs=[generate_btn]
+    ).then(
+        fn=get_seed,
         inputs=[randomize_seed, seed],
+        outputs=[seed]
     ).then(
+        fn=preprocess_image,
+        inputs=[
+            image_prompt,
+            prompt,
+            style,
+            num_steps,
+            guidance_scale,
+            controlnet_scale
+        ],
+        outputs=image_prompt
     ).then(
+        fn=image_to_3d,
+        inputs=[
+            image_prompt,
+            seed,
+            ss_guidance,
+            ss_steps,
+            slat_guidance,
+            slat_steps
+        ],
+        outputs=[output_buf, video_output]
     ).then(
+        fn=lambda state: extract_glb(state, 0.95, 1024),
+        inputs=[output_buf],
+        outputs=download_btn,
+        show_progress=True
+    ).then(
+        fn=lambda: gr.update(interactive=True),
+        outputs=[generate_btn]
     )
+    # Eventos de limpieza
+    video_output.clear(
+        fn=lambda: gr.update(interactive=False),
+        outputs=[download_btn]
     )
 if __name__ == "__main__":
+    demo.queue(max_size=20).launch(share=True)