import gradio as gr
import spaces

import numpy as np
import random
import os
import torch
from diffusers import DiffusionPipeline
from transformers import pipeline, AutoTokenizer
from huggingface_hub import login
from PIL import Image
import shutil
import subprocess
import sys
import importlib.util
os.environ['SPCONV_ALGO'] = 'native'
# Use PyTorch native scaled_dot_product_attention everywhere: xformers / flash_attn
# have no kernels for the Blackwell (sm_120) GPUs now used by ZeroGPU.
os.environ.setdefault('ATTN_BACKEND', 'sdpa')
os.environ.setdefault('SPARSE_ATTN_BACKEND', 'sdpa')
# DINOv2 (the image conditioning model loaded via torch.hub) also calls xformers
# directly; disabling it makes DINOv2 fall back to standard PyTorch attention,
# which works on Blackwell (sm_120).
os.environ.setdefault('XFORMERS_DISABLED', '1')
# Compile the runtime CUDA extensions (diff_gaussian_rasterization) for sm_120,
# with PTX so the driver can JIT for newer archs.
os.environ.setdefault('TORCH_CUDA_ARCH_LIST', '12.0+PTX')


# Workaround for a gradio_client 1.7.0 bug: boolean JSON schemas (e.g.
# additionalProperties: true, produced by gr.State) crash the /info endpoint
# with "TypeError: argument of type 'bool' is not iterable".
import gradio_client.utils as _gc_utils

_gc_orig_json_to_py = _gc_utils._json_schema_to_python_type


def _gc_safe_json_to_py(schema, defs=None):
    if isinstance(schema, bool):
        return "bool"
    return _gc_orig_json_to_py(schema, defs)


_gc_utils._json_schema_to_python_type = _gc_safe_json_to_py

_gc_orig_get_type = _gc_utils.get_type


def _gc_safe_get_type(schema):
    if not isinstance(schema, dict):
        return "Any"
    return _gc_orig_get_type(schema)


_gc_utils.get_type = _gc_safe_get_type


def ensure_runtime_package(module_name: str, requirement: str) -> None:
    if importlib.util.find_spec(module_name) is not None:
        return
    subprocess.run(
        [sys.executable, "-m", "pip", "install", "--no-build-isolation", requirement],
        check=True,
    )


def ensure_mip_gaussian_rasterization() -> None:
    # TRELLIS needs the Mip-Splatting fork of diff-gaussian-rasterization (it exposes
    # the kernel_size / subpixel_offset rasterization settings; the upstream
    # graphdeco-inria build does not). We clone recursively (to fetch the third_party
    # glm headers) and build from source so it links against the installed torch and
    # the Blackwell (sm_120) arch.
    if importlib.util.find_spec("diff_gaussian_rasterization") is not None:
        return
    repo_dir = "/tmp/mip-splatting"
    if not os.path.isdir(repo_dir):
        subprocess.run(
            ["git", "clone", "--recursive",
             "https://github.com/autonomousvision/mip-splatting.git", repo_dir],
            check=True,
        )
    subprocess.run(
        [sys.executable, "-m", "pip", "install", "--no-build-isolation",
         os.path.join(repo_dir, "submodules", "diff-gaussian-rasterization")],
        check=True,
    )


# diff_gaussian_rasterization and nvdiffrast are CUDA extensions that must be
# compiled against the installed torch version, so we build them at runtime
# instead of shipping torch-2.4 prebuilt wheels.
ensure_mip_gaussian_rasterization()
ensure_runtime_package("nvdiffrast", "git+https://github.com/NVlabs/nvdiffrast.git")

from typing import *
import imageio
from easydict import EasyDict as edict
from trellis.pipelines import TrellisImageTo3DPipeline
from trellis.representations import Gaussian, MeshExtractResult
from trellis.utils import render_utils, postprocessing_utils


hf_token = os.getenv("hf_token")
login(token=hf_token)

# Global constants and default values
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

# Default system prompt for text generation
DEFAULT_SYSTEM_PROMPT = """You are a product designer with strong knowledge in text-to-image generation. You will receive a product request in the form of a brief description, and your mission will be to imagine a new product design that meets this need.

The deliverable (generated response) will be exclusively a text prompt for the FLUX.1-dev text-to-image AI.

This prompt should include a visual description of the object explicitly mentioning the essential aspects of its function.
Additionally, you should explicitly mention in this prompt the aesthetic/photo characteristics of the image rendering (e.g., photorealistic, high quality, focal length, grain, etc.), knowing that the image will be the main image of this object in the product catalog. The background of the generated image must be entirely white.
The prompt should be without narration."""

# Default Flux parameters
DEFAULT_SEED = 42
DEFAULT_RANDOMIZE_SEED = True
DEFAULT_WIDTH = 512
DEFAULT_HEIGHT = 512
DEFAULT_NUM_INFERENCE_STEPS = 6
DEFAULT_GUIDANCE_SCALE = 0.0
DEFAULT_TEMPERATURE = 0.9

TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp')
os.makedirs(TMP_DIR, exist_ok=True)

_text_gen_pipeline = None
_image_gen_pipeline = None
_trellis_pipeline = None


def start_session(req: gr.Request):
    # user_dir = os.path.join(TMP_DIR, "temp_output")
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    os.makedirs(user_dir, exist_ok=True)

def end_session(req: gr.Request):
    # user_dir = os.path.join(TMP_DIR, "temp_output")
    user_dir = os.path.join(TMP_DIR, str(req.session_hash))
    shutil.rmtree(user_dir)

def preprocess_image(image: Image.Image) -> Image.Image:
    trellis = get_trellis_pipeline()
    if trellis is None:
        # If the pipeline is not loaded, just return the original image
        return image

    processed_image = trellis.preprocess_image(image)
    return processed_image



@spaces.GPU()
def get_image_gen_pipeline():
    global _image_gen_pipeline
    if (_image_gen_pipeline is None):
        try:
            device = "cuda" if torch.cuda.is_available() else "cpu"
            dtype = torch.bfloat16
            _image_gen_pipeline = DiffusionPipeline.from_pretrained(
                # "black-forest-labs/FLUX.1-schnell",
                "black-forest-labs/FLUX.1-dev",
                torch_dtype=dtype,
            ).to(device)

        except Exception as e:
            print(f"Error loading image generation model: {e}")
            return None
    return _image_gen_pipeline

@spaces.GPU()
def get_text_gen_pipeline():
    global _text_gen_pipeline
    if (_text_gen_pipeline is None):
        try:
            device = "cuda" if torch.cuda.is_available() else "cpu"
            tokenizer = AutoTokenizer.from_pretrained(
                "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
                use_fast=True
            )
            tokenizer.pad_token = tokenizer.pad_token or tokenizer.eos_token

            _text_gen_pipeline = pipeline(
                "text-generation",
                model="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
                tokenizer=tokenizer,
                max_new_tokens=2048,
                device=device,
                pad_token_id=tokenizer.pad_token_id
            )
        except Exception as e:
            print(f"Error loading text generation model: {e}")
            return None
    return _text_gen_pipeline

# @spaces.GPU()
def get_trellis_pipeline():
    global _trellis_pipeline
    if _trellis_pipeline is None:
        try:
            print("Loading Trellis pipeline...")
            _trellis_pipeline = TrellisImageTo3DPipeline.from_pretrained("microsoft/TRELLIS-image-large")
        except Exception as e:
            print(f"Error loading Trellis pipeline: {e}")
            return None
    return _trellis_pipeline


def split_reasoning(text: str) -> Tuple[str, str]:
    """Split a DeepSeek-R1 response into (reasoning, clean_prompt).

    DeepSeek-R1 emits its chain-of-thought inside <think>...</think> tags
    before the final answer. We keep only the text after </think> for FLUX,
    and surface the reasoning separately. If the closing tag is missing
    (the model occasionally omits it), we treat the whole text as the prompt
    and leave the reasoning empty.
    """
    reasoning = ""
    prompt = text.strip()

    if "</think>" in prompt:
        reasoning, _, prompt = prompt.partition("</think>")
        # Drop a leading <think> opener if present.
        reasoning = reasoning.replace("<think>", "").strip()
        prompt = prompt.strip()
    # Strip any stray opener (e.g. the model omitted the closing tag).
    prompt = prompt.replace("<think>", "").strip()

    # Strip a leading bold header, whether on its own line ("**Prompt for
    # FLUX:**\n...") or inline ("**Prompt:** actual text"). Only strip when the
    # bold segment looks like a header (mentions "prompt") to avoid removing a
    # legitimate bold opening word.
    stripped = prompt.lstrip()
    if stripped.startswith("**"):
        end = stripped.find("**", 2)
        if end != -1 and "prompt" in stripped[2:end].lower():
            rest = stripped[end + 2:].lstrip()
            if rest.startswith(":"):
                rest = rest[1:]
            prompt = rest.strip()

    # Remove wrapping quotation marks.
    if len(prompt) >= 2 and prompt[0] in "\"'" and prompt[-1] == prompt[0]:
        prompt = prompt[1:-1].strip()

    return reasoning, prompt


@spaces.GPU()
def refine_prompt(
        prompt,
        system_prompt=DEFAULT_SYSTEM_PROMPT,
        progress=gr.Progress(track_tqdm=True)
        ):
    text_gen = get_text_gen_pipeline()
    if text_gen is None:
        return "", "", "Text generation model is unavailable."
    try:
        messages = [
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": prompt},
        ]

        # Indicate progress started
        progress(0, desc="Generating text")

        # Generate text
        refined_prompt = text_gen(messages)

        # Indicate progress complete
        progress(1)

        # Extract just the assistant's content from the response
        try:
            messages = refined_prompt[0]['generated_text']
            # Find the last message with role 'assistant'
            assistant_messages = [msg for msg in messages if msg['role'] == 'assistant']
            if not assistant_messages:
                return "", "", "Error: No assistant response found"
            assistant_content = assistant_messages[-1]['content']

            # Separate DeepSeek-R1 reasoning from the final FLUX prompt.
            reasoning, clean_prompt = split_reasoning(assistant_content)

            if not clean_prompt:
                return reasoning, "", "Error: Model returned an empty prompt"

            return reasoning, clean_prompt, "Prompt refined successfully!"
        except (KeyError, IndexError):
            return "", "", "Error: Unexpected response format from the model"
    except Exception as e:
        print(f"Error in refine_prompt: {str(e)}")  # Add debug print
        return "", "", f"Error refining prompt: {str(e)}"

def validate_dimensions(width, height):
    if width * height > MAX_IMAGE_SIZE * MAX_IMAGE_SIZE:
        return False, "Image dimensions too large"
    return True, None

@spaces.GPU()
def generate_image(prompt, seed=DEFAULT_SEED,
          randomize_seed=DEFAULT_RANDOMIZE_SEED,
          width=DEFAULT_WIDTH,
          height=DEFAULT_HEIGHT,
          num_inference_steps=DEFAULT_NUM_INFERENCE_STEPS,
          progress=gr.Progress(track_tqdm=True)):
    try:
        # Validate that prompt is not empty
        if not prompt or prompt.strip() == "":
            return None, "Please provide a valid prompt."

        progress(0.1, desc="Loading model")
        pipe = get_image_gen_pipeline()
        if pipe is None:
            return None, "Image generation model is unavailable."

        is_valid, error_msg = validate_dimensions(width, height)
        if not is_valid:
            return None, error_msg

        if randomize_seed:
            seed = random.randint(0, MAX_SEED)

        # Use default torch generator instead of cuda-specific generator
        generator = torch.Generator().manual_seed(seed)

        progress(0.3, desc="Running inference")
        # Match the working example's parameters
        output = pipe(
            prompt=prompt,
            width=width,
            height=height,
            num_inference_steps=num_inference_steps,
            generator=generator,
            guidance_scale=DEFAULT_GUIDANCE_SCALE,
        )

        progress(0.8, desc="Processing output")
        image = output.images[0]
        progress(1.0, desc="Complete")
        return image, f"Image generated successfully with seed {seed}"
    except Exception as e:
        print(f"Error in generate_image: {str(e)}")
        return None, f"Error generating image: {str(e)}"


examples = [
    "a backpack for kids, flower style",
    "medieval flip flops",
    "cat shaped cake mold",
]

css="""
#col-container {
    margin: 0 auto;
    max-width: 720px;
}
.step-card {
    border: 1px solid var(--border-color-primary);
    border-radius: 12px;
    padding: 12px;
}
"""

# Real gallery results produced by running the pipeline on the Space, captured
# by scripts/generate_gallery.py into assets/gallery/ + manifest.json. Each
# manifest entry is {prompt, refined_prompt, image, video, glb} with repo-relative
# asset paths.
APP_DIR = os.path.dirname(os.path.abspath(__file__))
GALLERY_DIR = os.path.join(APP_DIR, 'assets', 'gallery')
GALLERY_MANIFEST = os.path.join(GALLERY_DIR, 'manifest.json')


def load_gallery():
    if not os.path.exists(GALLERY_MANIFEST):
        return []
    import json
    try:
        with open(GALLERY_MANIFEST, encoding='utf-8') as f:
            items = json.load(f)
    except Exception as e:
        print(f"Error loading gallery manifest: {e}")
        return []
    resolved = []
    for it in items:
        entry = dict(it)
        for key in ('image', 'video', 'glb'):
            if it.get(key):
                abs_path = os.path.join(APP_DIR, it[key])
                entry[key] = abs_path if os.path.exists(abs_path) else None
        resolved.append(entry)
    return resolved

def pack_state(gs: Gaussian, mesh: MeshExtractResult) -> dict:
    return {
        'gaussian': {
            **gs.init_params,
            '_xyz': gs._xyz.cpu().numpy(),
            '_features_dc': gs._features_dc.cpu().numpy(),
            '_scaling': gs._scaling.cpu().numpy(),
            '_rotation': gs._rotation.cpu().numpy(),
            '_opacity': gs._opacity.cpu().numpy(),
        },
        'mesh': {
            'vertices': mesh.vertices.cpu().numpy(),
            'faces': mesh.faces.cpu().numpy(),
        },
    }


def unpack_state(state: dict) -> Tuple[Gaussian, edict, str]:
    gs = Gaussian(
        aabb=state['gaussian']['aabb'],
        sh_degree=state['gaussian']['sh_degree'],
        mininum_kernel_size=state['gaussian']['mininum_kernel_size'],
        scaling_bias=state['gaussian']['scaling_bias'],
        opacity_bias=state['gaussian']['opacity_bias'],
        scaling_activation=state['gaussian']['scaling_activation'],
    )
    gs._xyz = torch.tensor(state['gaussian']['_xyz'], device='cuda')
    gs._features_dc = torch.tensor(state['gaussian']['_features_dc'], device='cuda')
    gs._scaling = torch.tensor(state['gaussian']['_scaling'], device='cuda')
    gs._rotation = torch.tensor(state['gaussian']['_rotation'], device='cuda')
    gs._opacity = torch.tensor(state['gaussian']['_opacity'], device='cuda')

    mesh = edict(
        vertices=torch.tensor(state['mesh']['vertices'], device='cuda'),
        faces=torch.tensor(state['mesh']['faces'], device='cuda'),
    )

    return gs, mesh


@spaces.GPU
def image_to_3d(
    image: Image.Image,
    seed: int,
    ss_guidance_strength: float,
    ss_sampling_steps: int,
    slat_guidance_strength: float,
    slat_sampling_steps: int,
    req: gr.Request,
) -> Tuple[dict, str, str, str, str]:
    try:
        # Load the Trellis pipeline
        pipeline = get_trellis_pipeline()
        if pipeline is None:
            return None, None, None, "Trellis pipeline is unavailable.", None
        pipeline.cuda()

        # Preprocess image
        image = preprocess_image(image)

        # Run the pipeline
        outputs = pipeline.run(
            image,
            seed=seed,
            formats=["gaussian", "mesh"],
            preprocess_image=False,
            sparse_structure_sampler_params={
                "steps": ss_sampling_steps,
                "cfg_strength": ss_guidance_strength,
            },
            slat_sampler_params={
                "steps": slat_sampling_steps,
                "cfg_strength": slat_guidance_strength,
            },
        )
        # temp_dir = os.path.join(TMP_DIR, "temp_output")
        temp_dir = os.path.join(TMP_DIR, str(req.session_hash))
        # The browser `load` event normally creates this via start_session, but
        # API callers (e.g. gradio_client) never trigger it, so ensure it exists.
        os.makedirs(temp_dir, exist_ok=True)

        video = render_utils.render_video(outputs['gaussian'][0], num_frames=120)['color']
        video_geo = render_utils.render_video(outputs['mesh'][0], num_frames=120)['normal']
        video = [np.concatenate([video[i], video_geo[i]], axis=1) for i in range(len(video))]
        video_path = os.path.join(temp_dir, 'sample.mp4')
        imageio.mimsave(video_path, video, fps=15)
        state = pack_state(outputs['gaussian'][0], outputs['mesh'][0])

        # Export an interactive, textured GLB for the 3D viewer and download.
        glb = postprocessing_utils.to_glb(
            outputs['gaussian'][0],
            outputs['mesh'][0],
            simplify=0.95,
            texture_size=1024,
            verbose=False,
        )
        glb_path = os.path.join(temp_dir, 'sample.glb')
        glb.export(glb_path)

        torch.cuda.empty_cache()
        return state, video_path, glb_path, "3D model generated successfully!", glb_path
    except Exception as e:
        print(f"Error in image_to_3d: {str(e)}")
        import traceback
        traceback.print_exc()  # Print the full stack trace for debugging
        return None, None, None, f"Error generating 3D model: {str(e)}", None


def process_example_pipeline(example_prompt):
    return example_prompt

HOW_IT_WORKS_MD = """
## Comment ça marche

Ce Space transforme une **idée en une phrase** en un **objet 3D téléchargeable**,
en enchaînant trois modèles spécialisés.

```
Prompt simple ──▶ DeepSeek-R1 ──▶ FLUX.1-dev ──▶ TRELLIS ──▶ Vidéo + GLB
   (texte)         (raisonnement     (image          (objet 3D)
                    + prompt riche)    produit)
```

**Étape 1 — DeepSeek-R1-Distill-Llama-8B (texte → texte).**
Le modèle joue le rôle d'un designer produit : à partir d'une description courte,
il *réfléchit* (chaîne de raisonnement visible dans l'accordéon « Raisonnement du
modèle ») puis rédige un prompt visuel détaillé et photoréaliste pour FLUX. Seul
le prompt final — sans le raisonnement — est transmis à l'étape suivante.

**Étape 2 — FLUX.1-dev (texte → image).**
Le prompt détaillé est rendu en une image produit sur fond blanc, cadrée comme une
photo de catalogue.

**Étape 3 — TRELLIS (image → 3D).**
L'image est convertie en un asset 3D : une vidéo de prévisualisation (rendu couleur
+ normales) et un fichier **GLB** texturé, manipulable directement dans la
visionneuse interactive et téléchargeable.

> **Pourquoi passer par une image avant la 3D ?** TRELLIS est conditionné sur une
> image. Générer d'abord une image nette et bien cadrée donne un maillage et une
> texture bien plus propres qu'une génération 3D directe depuis du texte.

### Stack technique
- **DeepSeek-R1-Distill-Llama-8B** — raisonnement + ingénierie de prompt
- **FLUX.1-dev** — diffusion texte→image
- **TRELLIS (microsoft/TRELLIS-image-large)** — génération 3D (Gaussian + mesh)
- **ZeroGPU** sur GPU Blackwell (sm_120), attention `sdpa`, extensions CUDA
  compilées au runtime
"""


def create_interface():
    theme = gr.themes.Soft(primary_hue="pink", secondary_hue="rose")

    with gr.Blocks(css=css, theme=theme, title="Text to 3D") as demo:
        # Move session handlers INSIDE the Blocks context
        demo.load(fn=start_session)
        demo.unload(fn=end_session)

        # State for storing 3D model data
        output_state = gr.State(None)

        with gr.Column(elem_id="col-container"):
            gr.Markdown(
                "# Text to 3D\n"
                "De quelques mots à un objet 3D téléchargeable — "
                "**DeepSeek-R1 + FLUX.1-dev + TRELLIS**."
            )
            gr.Markdown(
                "> ⏳ **Démarrage à froid (ZeroGPU)** : au premier lancement, le "
                "chargement de DeepSeek-R1 (8B), FLUX et TRELLIS peut prendre "
                "plusieurs minutes. Les générations suivantes sont bien plus rapides. "
                "Pas envie d'attendre ? Voyez l'onglet **Galerie**."
            )

            with gr.Tabs():
                # ---------------------------------------------------------- Demo
                with gr.Tab("Démo en direct"):
                    prompt = gr.Text(
                        label="Votre idée",
                        max_lines=1,
                        placeholder="Ex. : a backpack for kids, flower style",
                    )
                    run_all_button = gr.Button("✨ Générer tout", variant="primary")

                    # Step 1 — DeepSeek-R1
                    with gr.Group(elem_classes="step-card"):
                        gr.Markdown("### Étape 1 — DeepSeek-R1 · prompt design")
                        status1 = gr.Markdown("Étape 1 — en attente")
                        with gr.Accordion("Raisonnement du modèle", open=False):
                            reasoning_box = gr.Textbox(
                                show_label=False,
                                max_lines=20,
                                placeholder="La chaîne de raisonnement de DeepSeek-R1 apparaîtra ici",
                                interactive=False,
                            )
                        refined_prompt = gr.Text(
                            label="Prompt détaillé (envoyé à FLUX)",
                            max_lines=10,
                            placeholder="Detailed object prompt",
                            max_length=2048,
                        )
                        prompt_button = gr.Button("Rejouer l'étape 1 — Affiner le prompt")

                    # Step 2 — FLUX
                    with gr.Group(elem_classes="step-card"):
                        gr.Markdown("### Étape 2 — FLUX.1-dev · image produit")
                        status2 = gr.Markdown("Étape 2 — en attente")
                        generated_image = gr.Image(
                            label="Image générée",
                            format="png",
                            image_mode="RGBA",
                            type="pil",
                            height=300,
                        )
                        visual_button = gr.Button("Rejouer l'étape 2 — Générer l'image")

                    # Step 3 — TRELLIS
                    with gr.Group(elem_classes="step-card"):
                        gr.Markdown("### Étape 3 — TRELLIS · objet 3D")
                        status3 = gr.Markdown("Étape 3 — en attente")
                        with gr.Row():
                            video_output = gr.Video(
                                label="Prévisualisation (couleur + normales)",
                                autoplay=True, loop=True, height=300,
                            )
                            model_3d = gr.Model3D(label="Modèle 3D interactif", height=300)
                        download_glb = gr.DownloadButton("⬇️ Télécharger le .glb", value=None)
                        gen3d_button = gr.Button("Rejouer l'étape 3 — Générer la 3D")

                    message_box = gr.Textbox(
                        label="Messages de statut",
                        interactive=False,
                        placeholder="Les messages détaillés apparaîtront ici",
                    )

                    # Accordion sections for advanced settings
                    with gr.Accordion("Réglages avancés", open=False):
                        with gr.Tab("DeepSeek-R1"):
                            temperature = gr.Slider(
                                label="Temperature",
                                value=DEFAULT_TEMPERATURE,
                                minimum=0.0,
                                maximum=1.0,
                                step=0.05,
                                info="Higher values produce more diverse outputs",
                            )

                            system_prompt = gr.Textbox(
                                label="System Prompt",
                                value=DEFAULT_SYSTEM_PROMPT,
                                lines=10,
                                info="Instructions for the DeepSeek-R1 model"
                            )

                        with gr.Tab("Flux"):
                            flux_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=DEFAULT_SEED)
                            flux_randomize_seed = gr.Checkbox(label="Randomize seed", value=DEFAULT_RANDOMIZE_SEED)

                            with gr.Row():
                                width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=DEFAULT_WIDTH)
                                height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=DEFAULT_HEIGHT)

                            num_inference_steps = gr.Slider(
                                label="Number of inference steps",
                                minimum=1,
                                maximum=50,
                                step=1,
                                value=DEFAULT_NUM_INFERENCE_STEPS,
                            )

                        with gr.Tab("3D Generation Settings"):
                            trellis_seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1)
                            trellis_randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                            gr.Markdown("Stage 1: Sparse Structure Generation")
                            with gr.Row():
                                ss_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=7.5, step=0.1)
                                ss_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)
                            gr.Markdown("Stage 2: Structured Latent Generation")
                            with gr.Row():
                                slat_guidance_strength = gr.Slider(0.0, 10.0, label="Guidance Strength", value=3.0, step=0.1)
                                slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1)

                    gr.Examples(
                        examples=examples,
                        fn=process_example_pipeline,
                        inputs=[prompt],
                        outputs=[prompt],
                        cache_examples=True,
                    )

                # ------------------------------------------------------- Gallery
                with gr.Tab("Galerie"):
                    gr.Markdown(
                        "De **vrais résultats** produits par le pipeline complet "
                        "(prompt → image → 3D). Affichage instantané, même quand le "
                        "GPU est endormi."
                    )
                    gallery_items = load_gallery()
                    if not gallery_items:
                        gr.Markdown(
                            "_Galerie en cours de génération — lancez "
                            "`python scripts/generate_gallery.py` puis committez "
                            "`assets/gallery/`._"
                        )
                    for item in gallery_items:
                        with gr.Group(elem_classes="step-card"):
                            gr.Markdown(f"**Prompt :** {item.get('prompt', '')}")
                            with gr.Row():
                                if item.get("image"):
                                    gr.Image(
                                        value=item["image"], label="Image (FLUX)",
                                        height=260, interactive=False,
                                    )
                                if item.get("video"):
                                    gr.Video(
                                        value=item["video"], label="Aperçu (TRELLIS)",
                                        autoplay=True, loop=True, height=260,
                                    )
                                if item.get("glb"):
                                    gr.Model3D(
                                        value=item["glb"],
                                        label="Modèle 3D interactif", height=260,
                                    )
                            if item.get("refined_prompt"):
                                with gr.Accordion("Prompt détaillé (DeepSeek-R1)", open=False):
                                    gr.Markdown(item["refined_prompt"])

                # --------------------------------------------------- How it works
                with gr.Tab("Comment ça marche"):
                    gr.Markdown(HOW_IT_WORKS_MD)

        # ----------------------------------------------------------- Wiring
        # Individual step buttons (replay one stage in isolation).
        gr.on(
            triggers=[prompt_button.click, prompt.submit],
            fn=refine_prompt,
            inputs=[prompt, system_prompt],
            outputs=[reasoning_box, refined_prompt, status1],
        )

        gr.on(
            triggers=[visual_button.click],
            fn=generate_image,
            inputs=[refined_prompt, flux_seed, flux_randomize_seed, width, height, num_inference_steps],
            outputs=[generated_image, status2],
        )

        gr.on(
            triggers=[gen3d_button.click],
            fn=image_to_3d,
            inputs=[generated_image, trellis_seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
            outputs=[output_state, video_output, model_3d, status3, download_glb],
        )

        # One-click pipeline: refine → image → 3D, with a per-step status.
        run_all_button.click(
            fn=lambda: ("⏳ Étape 1 — en cours…", "Étape 2 — en attente", "Étape 3 — en attente"),
            inputs=None,
            outputs=[status1, status2, status3],
        ).then(
            fn=refine_prompt,
            inputs=[prompt, system_prompt],
            outputs=[reasoning_box, refined_prompt, status1],
        ).then(
            fn=lambda: "⏳ Étape 2 — en cours…",
            inputs=None,
            outputs=[status2],
        ).then(
            fn=generate_image,
            inputs=[refined_prompt, flux_seed, flux_randomize_seed, width, height, num_inference_steps],
            outputs=[generated_image, status2],
        ).then(
            fn=lambda: "⏳ Étape 3 — en cours…",
            inputs=None,
            outputs=[status3],
        ).then(
            fn=image_to_3d,
            inputs=[generated_image, trellis_seed, ss_guidance_strength, ss_sampling_steps, slat_guidance_strength, slat_sampling_steps],
            outputs=[output_state, video_output, model_3d, status3, download_glb],
        )

    return demo

if __name__ == "__main__":
    demo = create_interface()
    demo.launch()