import gradio as gr import spaces import numpy as np import random import os import torch from diffusers import DiffusionPipeline from transformers import pipeline, AutoTokenizer from huggingface_hub import login from PIL import Image import shutil import subprocess import sys import importlib.util os.environ['SPCONV_ALGO'] = 'native' # Use PyTorch native scaled_dot_product_attention everywhere: xformers / flash_attn # have no kernels for the Blackwell (sm_120) GPUs now used by ZeroGPU. os.environ.setdefault('ATTN_BACKEND', 'sdpa') os.environ.setdefault('SPARSE_ATTN_BACKEND', 'sdpa') # DINOv2 (the image conditioning model loaded via torch.hub) also calls xformers # directly; disabling it makes DINOv2 fall back to standard PyTorch attention, # which works on Blackwell (sm_120). os.environ.setdefault('XFORMERS_DISABLED', '1') # Compile the runtime CUDA extensions (diff_gaussian_rasterization) for sm_120, # with PTX so the driver can JIT for newer archs. os.environ.setdefault('TORCH_CUDA_ARCH_LIST', '12.0+PTX') # Workaround for a gradio_client 1.7.0 bug: boolean JSON schemas (e.g. # additionalProperties: true, produced by gr.State) crash the /info endpoint # with "TypeError: argument of type 'bool' is not iterable". import gradio_client.utils as _gc_utils _gc_orig_json_to_py = _gc_utils._json_schema_to_python_type def _gc_safe_json_to_py(schema, defs=None): if isinstance(schema, bool): return "bool" return _gc_orig_json_to_py(schema, defs) _gc_utils._json_schema_to_python_type = _gc_safe_json_to_py _gc_orig_get_type = _gc_utils.get_type def _gc_safe_get_type(schema): if not isinstance(schema, dict): return "Any" return _gc_orig_get_type(schema) _gc_utils.get_type = _gc_safe_get_type def ensure_runtime_package(module_name: str, requirement: str) -> None: if importlib.util.find_spec(module_name) is not None: return subprocess.run( [sys.executable, "-m", "pip", "install", "--no-build-isolation", requirement], check=True, ) def ensure_mip_gaussian_rasterization() -> None: # TRELLIS needs the Mip-Splatting fork of diff-gaussian-rasterization (it exposes # the kernel_size / subpixel_offset rasterization settings; the upstream # graphdeco-inria build does not). We clone recursively (to fetch the third_party # glm headers) and build from source so it links against the installed torch and # the Blackwell (sm_120) arch. if importlib.util.find_spec("diff_gaussian_rasterization") is not None: return repo_dir = "/tmp/mip-splatting" if not os.path.isdir(repo_dir): subprocess.run( ["git", "clone", "--recursive", "https://github.com/autonomousvision/mip-splatting.git", repo_dir], check=True, ) subprocess.run( [sys.executable, "-m", "pip", "install", "--no-build-isolation", os.path.join(repo_dir, "submodules", "diff-gaussian-rasterization")], check=True, ) # diff_gaussian_rasterization and nvdiffrast are CUDA extensions that must be # compiled against the installed torch version, so we build them at runtime # instead of shipping torch-2.4 prebuilt wheels. ensure_mip_gaussian_rasterization() ensure_runtime_package("nvdiffrast", "git+https://github.com/NVlabs/nvdiffrast.git") from typing import * import imageio from easydict import EasyDict as edict from trellis.pipelines import TrellisImageTo3DPipeline from trellis.representations import Gaussian, MeshExtractResult from trellis.utils import render_utils, postprocessing_utils hf_token = os.getenv("hf_token") login(token=hf_token) # Global constants and default values MAX_SEED = np.iinfo(np.int32).max MAX_IMAGE_SIZE = 2048 # Default system prompt for text generation DEFAULT_SYSTEM_PROMPT = """You are a product designer with strong knowledge in text-to-image generation. You will receive a product request in the form of a brief description, and your mission will be to imagine a new product design that meets this need. The deliverable (generated response) will be exclusively a text prompt for the FLUX.1-dev text-to-image AI. This prompt should include a visual description of the object explicitly mentioning the essential aspects of its function. Additionally, you should explicitly mention in this prompt the aesthetic/photo characteristics of the image rendering (e.g., photorealistic, high quality, focal length, grain, etc.), knowing that the image will be the main image of this object in the product catalog. The background of the generated image must be entirely white. The prompt should be without narration.""" # Default Flux parameters DEFAULT_SEED = 42 DEFAULT_RANDOMIZE_SEED = True DEFAULT_WIDTH = 512 DEFAULT_HEIGHT = 512 DEFAULT_NUM_INFERENCE_STEPS = 6 DEFAULT_GUIDANCE_SCALE = 0.0 DEFAULT_TEMPERATURE = 0.9 TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp') os.makedirs(TMP_DIR, exist_ok=True) _text_gen_pipeline = None _image_gen_pipeline = None _trellis_pipeline = None def start_session(req: gr.Request): # user_dir = os.path.join(TMP_DIR, "temp_output") user_dir = os.path.join(TMP_DIR, str(req.session_hash)) os.makedirs(user_dir, exist_ok=True) def end_session(req: gr.Request): # user_dir = os.path.join(TMP_DIR, "temp_output") user_dir = os.path.join(TMP_DIR, str(req.session_hash)) shutil.rmtree(user_dir) def preprocess_image(image: Image.Image) -> Image.Image: trellis = get_trellis_pipeline() if trellis is None: # If the pipeline is not loaded, just return the original image return image processed_image = trellis.preprocess_image(image) return processed_image @spaces.GPU() def get_image_gen_pipeline(): global _image_gen_pipeline if (_image_gen_pipeline is None): try: device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.bfloat16 _image_gen_pipeline = DiffusionPipeline.from_pretrained( # "black-forest-labs/FLUX.1-schnell", "black-forest-labs/FLUX.1-dev", torch_dtype=dtype, ).to(device) except Exception as e: print(f"Error loading image generation model: {e}") return None return _image_gen_pipeline @spaces.GPU() def get_text_gen_pipeline(): global _text_gen_pipeline if (_text_gen_pipeline is None): try: device = "cuda" if torch.cuda.is_available() else "cpu" tokenizer = AutoTokenizer.from_pretrained( "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", use_fast=True ) tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token _text_gen_pipeline = pipeline( "text-generation", model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B", tokenizer=tokenizer, max_new_tokens=2048, device=device, pad_token_id=tokenizer.pad_token_id ) except Exception as e: print(f"Error loading text generation model: {e}") return None return _text_gen_pipeline # @spaces.GPU() def get_trellis_pipeline(): global _trellis_pipeline if _trellis_pipeline is None: try: print("Loading Trellis pipeline...") _trellis_pipeline = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large") except Exception as e: print(f"Error loading Trellis pipeline: {e}") return None return _trellis_pipeline def split_reasoning(text: str) -> Tuple[str, str]: """Split a DeepSeek-R1 response into (reasoning, clean_prompt). DeepSeek-R1 emits its chain-of-thought inside ... tags before the final answer. We keep only the text after for FLUX, and surface the reasoning separately. If the closing tag is missing (the model occasionally omits it), we treat the whole text as the prompt and leave the reasoning empty. """ reasoning = "" prompt = text.strip() if "" in prompt: reasoning, _, prompt = prompt.partition("") # Drop a leading opener if present. reasoning = reasoning.replace("", "").strip() prompt = prompt.strip() # Strip any stray opener (e.g. the model omitted the closing tag). prompt = prompt.replace("", "").strip() # Strip a leading bold header, whether on its own line ("**Prompt for # FLUX:**\n...") or inline ("**Prompt:** actual text"). Only strip when the # bold segment looks like a header (mentions "prompt") to avoid removing a # legitimate bold opening word. stripped = prompt.lstrip() if stripped.startswith("**"): end = stripped.find("**", 2) if end != -1 and "prompt" in stripped[2:end].lower(): rest = stripped[end + 2:].lstrip() if rest.startswith(":"): rest = rest[1:] prompt = rest.strip() # Remove wrapping quotation marks. if len(prompt) >= 2 and prompt[0] in "\"'" and prompt[-1] == prompt[0]: prompt = prompt[1:-1].strip() return reasoning, prompt @spaces.GPU() def refine_prompt( prompt, system_prompt=DEFAULT_SYSTEM_PROMPT, progress=gr.Progress(track_tqdm=True) ): text_gen = get_text_gen_pipeline() if text_gen is None: return "", "", "Text generation model is unavailable." try: messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ] # Indicate progress started progress(0, desc="Generating text") # Generate text refined_prompt = text_gen(messages) # Indicate progress complete progress(1) # Extract just the assistant's content from the response try: messages = refined_prompt[0]['generated_text'] # Find the last message with role 'assistant' assistant_messages = [msg for msg in messages if msg['role'] == 'assistant'] if not assistant_messages: return "", "", "Error: No assistant response found" assistant_content = assistant_messages[-1]['content'] # Separate DeepSeek-R1 reasoning from the final FLUX prompt. reasoning, clean_prompt = split_reasoning(assistant_content) if not clean_prompt: return reasoning, "", "Error: Model returned an empty prompt" return reasoning, clean_prompt, "Prompt refined successfully!" except (KeyError, IndexError): return "", "", "Error: Unexpected response format from the model" except Exception as e: print(f"Error in refine_prompt: {str(e)}") # Add debug print return "", "", f"Error refining prompt: {str(e)}" def validate_dimensions(width, height): if width * height > MAX_IMAGE_SIZE * MAX_IMAGE_SIZE: return False, "Image dimensions too large" return True, None @spaces.GPU() def generate_image(prompt, seed=DEFAULT_SEED, randomize_seed=DEFAULT_RANDOMIZE_SEED, width=DEFAULT_WIDTH, height=DEFAULT_HEIGHT, num_inference_steps=DEFAULT_NUM_INFERENCE_STEPS, progress=gr.Progress(track_tqdm=True)): try: # Validate that prompt is not empty if not prompt or prompt.strip() == "": return None, "Please provide a valid prompt." progress(0.1, desc="Loading model") pipe = get_image_gen_pipeline() if pipe is None: return None, "Image generation model is unavailable." is_valid, error_msg = validate_dimensions(width, height) if not is_valid: return None, error_msg if randomize_seed: seed = random.randint(0, MAX_SEED) # Use default torch generator instead of cuda-specific generator generator = torch.Generator().manual_seed(seed) progress(0.3, desc="Running inference") # Match the working example's parameters output = pipe( prompt=prompt, width=width, height=height, num_inference_steps=num_inference_steps, generator=generator, guidance_scale=DEFAULT_GUIDANCE_SCALE, ) progress(0.8, desc="Processing output") image = output.images[0] progress(1.0, desc="Complete") return image, f"Image generated successfully with seed {seed}" except Exception as e: print(f"Error in generate_image: {str(e)}") return None, f"Error generating image: {str(e)}" examples = [ "a backpack for kids, flower style", "medieval flip flops", "cat shaped cake mold", ] css=""" #col-container { margin: 0 auto; max-width: 720px; } .step-card { border: 1px solid var(--border-color-primary); border-radius: 12px; padding: 12px; } """ # Real gallery results produced by running the pipeline on the Space, captured # by scripts/generate_gallery.py into assets/gallery/ + manifest.json. Each # manifest entry is {prompt, refined_prompt, image, video, glb} with repo-relative # asset paths. APP_DIR = os.path.dirname(os.path.abspath(__file__)) GALLERY_DIR = os.path.join(APP_DIR, 'assets', 'gallery') GALLERY_MANIFEST = os.path.join(GALLERY_DIR, 'manifest.json') def load_gallery(): if not os.path.exists(GALLERY_MANIFEST): return [] import json try: with open(GALLERY_MANIFEST, encoding='utf-8') as f: items = json.load(f) except Exception as e: print(f"Error loading gallery manifest: {e}") return [] resolved = [] for it in items: entry = dict(it) for key in ('image', 'video', 'glb'): if it.get(key): abs_path = os.path.join(APP_DIR, it[key]) entry[key] = abs_path if os.path.exists(abs_path) else None resolved.append(entry) return resolved def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict: return { 'gaussian': { **gs.init_params, '_xyz': gs._xyz.cpu().numpy(), '_features_dc': gs._features_dc.cpu().numpy(), '_scaling': gs._scaling.cpu().numpy(), '_rotation': gs._rotation.cpu().numpy(), '_opacity': gs._opacity.cpu().numpy(), }, 'mesh': { 'vertices': mesh.vertices.cpu().numpy(), 'faces': mesh.faces.cpu().numpy(), }, } def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]: gs = Gaussian( aabb=state['gaussian']['aabb'], sh_degree=state['gaussian']['sh_degree'], mininum_kernel_size=state['gaussian']['mininum_kernel_size'], scaling_bias=state['gaussian']['scaling_bias'], opacity_bias=state['gaussian']['opacity_bias'], scaling_activation=state['gaussian']['scaling_activation'], ) gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda') gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda') gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda') gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda') gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda') mesh = edict( vertices=torch.tensor(state['mesh']['vertices'], device='cuda'), faces=torch.tensor(state['mesh']['faces'], device='cuda'), ) return gs, mesh @spaces.GPU def image_to_3d( image: Image.Image, seed: int, ss_guidance_strength: float, ss_sampling_steps: int, slat_guidance_strength: float, slat_sampling_steps: int, req: gr.Request, ) -> Tuple[dict, str, str, str, str]: try: # Load the Trellis pipeline pipeline = get_trellis_pipeline() if pipeline is None: return None, None, None, "Trellis pipeline is unavailable.", None pipeline.cuda() # Preprocess image image = preprocess_image(image) # Run the pipeline outputs = pipeline.run( image, seed=seed, formats=["gaussian", "mesh"], preprocess_image=False, sparse_structure_sampler_params={ "steps": ss_sampling_steps, "cfg_strength": ss_guidance_strength, }, slat_sampler_params={ "steps": slat_sampling_steps, "cfg_strength": slat_guidance_strength, }, ) # temp_dir = os.path.join(TMP_DIR, "temp_output") temp_dir = os.path.join(TMP_DIR, str(req.session_hash)) # The browser `load` event normally creates this via start_session, but # API callers (e.g. gradio_client) never trigger it, so ensure it exists. os.makedirs(temp_dir, exist_ok=True) video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color'] video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal'] video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))] video_path = os.path.join(temp_dir, 'sample.mp4') imageio.mimsave(video_path, video, fps=15) state = pack_state(outputs['gaussian'][0], outputs['mesh'][0]) # Export an interactive, textured GLB for the 3D viewer and download. glb = postprocessing_utils.to_glb( outputs['gaussian'][0], outputs['mesh'][0], simplify=0.95, texture_size=1024, verbose=False, ) glb_path = os.path.join(temp_dir, 'sample.glb') glb.export(glb_path) torch.cuda.empty_cache() return state, video_path, glb_path, "3D model generated successfully!", glb_path except Exception as e: print(f"Error in image_to_3d: {str(e)}") import traceback traceback.print_exc() # Print the full stack trace for debugging return None, None, None, f"Error generating 3D model: {str(e)}", None def process_example_pipeline(example_prompt): return example_prompt HOW_IT_WORKS_MD = """ ## Comment ça marche Ce Space transforme une **idée en une phrase** en un **objet 3D téléchargeable**, en enchaînant trois modèles spécialisés. ``` Prompt simple ──▶ DeepSeek-R1 ──▶ FLUX.1-dev ──▶ TRELLIS ──▶ Vidéo + GLB (texte) (raisonnement (image (objet 3D) + prompt riche) produit) ``` **Étape 1 — DeepSeek-R1-Distill-Llama-8B (texte → texte).** Le modèle joue le rôle d'un designer produit : à partir d'une description courte, il *réfléchit* (chaîne de raisonnement visible dans l'accordéon « Raisonnement du modèle ») puis rédige un prompt visuel détaillé et photoréaliste pour FLUX. Seul le prompt final — sans le raisonnement — est transmis à l'étape suivante. **Étape 2 — FLUX.1-dev (texte → image).** Le prompt détaillé est rendu en une image produit sur fond blanc, cadrée comme une photo de catalogue. **Étape 3 — TRELLIS (image → 3D).** L'image est convertie en un asset 3D : une vidéo de prévisualisation (rendu couleur + normales) et un fichier **GLB** texturé, manipulable directement dans la visionneuse interactive et téléchargeable. > **Pourquoi passer par une image avant la 3D ?** TRELLIS est conditionné sur une > image. Générer d'abord une image nette et bien cadrée donne un maillage et une > texture bien plus propres qu'une génération 3D directe depuis du texte. ### Stack technique - **DeepSeek-R1-Distill-Llama-8B** — raisonnement + ingénierie de prompt - **FLUX.1-dev** — diffusion texte→image - **TRELLIS (microsoft/TRELLIS-image-large)** — génération 3D (Gaussian + mesh) - **ZeroGPU** sur GPU Blackwell (sm_120), attention `sdpa`, extensions CUDA compilées au runtime """ def create_interface(): theme = gr.themes.Soft(primary_hue="pink", secondary_hue="rose") with gr.Blocks(css=css, theme=theme, title="Text to 3D") as demo: # Move session handlers INSIDE the Blocks context demo.load(fn=start_session) demo.unload(fn=end_session) # State for storing 3D model data output_state = gr.State(None) with gr.Column(elem_id="col-container"): gr.Markdown( "# Text to 3D\n" "De quelques mots à un objet 3D téléchargeable — " "**DeepSeek-R1 + FLUX.1-dev + TRELLIS**." ) gr.Markdown( "> ⏳ **Démarrage à froid (ZeroGPU)** : au premier lancement, le " "chargement de DeepSeek-R1 (8B), FLUX et TRELLIS peut prendre " "plusieurs minutes. Les générations suivantes sont bien plus rapides. " "Pas envie d'attendre ? Voyez l'onglet **Galerie**." ) with gr.Tabs(): # ---------------------------------------------------------- Demo with gr.Tab("Démo en direct"): prompt = gr.Text( label="Votre idée", max_lines=1, placeholder="Ex. : a backpack for kids, flower style", ) run_all_button = gr.Button("✨ Générer tout", variant="primary") # Step 1 — DeepSeek-R1 with gr.Group(elem_classes="step-card"): gr.Markdown("### Étape 1 — DeepSeek-R1 · prompt design") status1 = gr.Markdown("Étape 1 — en attente") with gr.Accordion("Raisonnement du modèle", open=False): reasoning_box = gr.Textbox( show_label=False, max_lines=20, placeholder="La chaîne de raisonnement de DeepSeek-R1 apparaîtra ici", interactive=False, ) refined_prompt = gr.Text( label="Prompt détaillé (envoyé à FLUX)", max_lines=10, placeholder="Detailed object prompt", max_length=2048, ) prompt_button = gr.Button("Rejouer l'étape 1 — Affiner le prompt") # Step 2 — FLUX with gr.Group(elem_classes="step-card"): gr.Markdown("### Étape 2 — FLUX.1-dev · image produit") status2 = gr.Markdown("Étape 2 — en attente") generated_image = gr.Image( label="Image générée", format="png", image_mode="RGBA", type="pil", height=300, ) visual_button = gr.Button("Rejouer l'étape 2 — Générer l'image") # Step 3 — TRELLIS with gr.Group(elem_classes="step-card"): gr.Markdown("### Étape 3 — TRELLIS · objet 3D") status3 = gr.Markdown("Étape 3 — en attente") with gr.Row(): video_output = gr.Video( label="Prévisualisation (couleur + normales)", autoplay=True, loop=True, height=300, ) model_3d = gr.Model3D(label="Modèle 3D interactif", height=300) download_glb = gr.DownloadButton("⬇️ Télécharger le .glb", value=None) gen3d_button = gr.Button("Rejouer l'étape 3 — Générer la 3D") message_box = gr.Textbox( label="Messages de statut", interactive=False, placeholder="Les messages détaillés apparaîtront ici", ) # Accordion sections for advanced settings with gr.Accordion("Réglages avancés", open=False): with gr.Tab("DeepSeek-R1"): temperature = gr.Slider( label="Temperature", value=DEFAULT_TEMPERATURE, minimum=0.0, maximum=1.0, step=0.05, info="Higher values produce more diverse outputs", ) system_prompt = gr.Textbox( label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=10, info="Instructions for the DeepSeek-R1 model" ) with gr.Tab("Flux"): flux_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=DEFAULT_SEED) flux_randomize_seed = gr.Checkbox(label="Randomize seed", value=DEFAULT_RANDOMIZE_SEED) with gr.Row(): width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=DEFAULT_WIDTH) height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=DEFAULT_HEIGHT) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=50, step=1, value=DEFAULT_NUM_INFERENCE_STEPS, ) with gr.Tab("3D Generation Settings"): trellis_seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1) trellis_randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) gr.Markdown("Stage 1: Sparse Structure Generation") with gr.Row(): ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1) ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1) gr.Markdown("Stage 2: Structured Latent Generation") with gr.Row(): slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1) slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1) gr.Examples( examples=examples, fn=process_example_pipeline, inputs=[prompt], outputs=[prompt], cache_examples=True, ) # ------------------------------------------------------- Gallery with gr.Tab("Galerie"): gr.Markdown( "De **vrais résultats** produits par le pipeline complet " "(prompt → image → 3D). Affichage instantané, même quand le " "GPU est endormi." ) gallery_items = load_gallery() if not gallery_items: gr.Markdown( "_Galerie en cours de génération — lancez " "`python scripts/generate_gallery.py` puis committez " "`assets/gallery/`._" ) for item in gallery_items: with gr.Group(elem_classes="step-card"): gr.Markdown(f"**Prompt :** {item.get('prompt', '')}") with gr.Row(): if item.get("image"): gr.Image( value=item["image"], label="Image (FLUX)", height=260, interactive=False, ) if item.get("video"): gr.Video( value=item["video"], label="Aperçu (TRELLIS)", autoplay=True, loop=True, height=260, ) if item.get("glb"): gr.Model3D( value=item["glb"], label="Modèle 3D interactif", height=260, ) if item.get("refined_prompt"): with gr.Accordion("Prompt détaillé (DeepSeek-R1)", open=False): gr.Markdown(item["refined_prompt"]) # --------------------------------------------------- How it works with gr.Tab("Comment ça marche"): gr.Markdown(HOW_IT_WORKS_MD) # ----------------------------------------------------------- Wiring # Individual step buttons (replay one stage in isolation). gr.on( triggers=[prompt_button.click, prompt.submit], fn=refine_prompt, inputs=[prompt, system_prompt], outputs=[reasoning_box, refined_prompt, status1], ) gr.on( triggers=[visual_button.click], fn=generate_image, inputs=[refined_prompt, flux_seed, flux_randomize_seed, width, height, num_inference_steps], outputs=[generated_image, status2], ) gr.on( triggers=[gen3d_button.click], fn=image_to_3d, inputs=[generated_image, trellis_seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps], outputs=[output_state, video_output, model_3d, status3, download_glb], ) # One-click pipeline: refine → image → 3D, with a per-step status. run_all_button.click( fn=lambda: ("⏳ Étape 1 — en cours…", "Étape 2 — en attente", "Étape 3 — en attente"), inputs=None, outputs=[status1, status2, status3], ).then( fn=refine_prompt, inputs=[prompt, system_prompt], outputs=[reasoning_box, refined_prompt, status1], ).then( fn=lambda: "⏳ Étape 2 — en cours…", inputs=None, outputs=[status2], ).then( fn=generate_image, inputs=[refined_prompt, flux_seed, flux_randomize_seed, width, height, num_inference_steps], outputs=[generated_image, status2], ).then( fn=lambda: "⏳ Étape 3 — en cours…", inputs=None, outputs=[status3], ).then( fn=image_to_3d, inputs=[generated_image, trellis_seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps], outputs=[output_state, video_output, model_3d, status3, download_glb], ) return demo if __name__ == "__main__": demo = create_interface() demo.launch()