Spaces:

notaneimu
/

video-to-colmap-for-tttlrm

Running

File size: 12,257 Bytes

from __future__ import annotations

import math
from pathlib import Path
from typing import Final

import gradio as gr

from video_to_colmap import (
    ConversionOutputs,
    VideoMetadata,
    convert_video_to_colmap_archive,
    infer_target_frames,
    read_video_metadata,
)

APP_DIR: Final[Path] = Path(__file__).resolve().parent
OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"

OUTPUTS_DIR.mkdir(parents=True, exist_ok=True)
gr.set_static_paths(paths=[str(OUTPUTS_DIR)])

DEFAULT_TARGET_FRAMES: Final[int] = 24
TARGET_FRAME_CHOICES: Final[list[str]] = ["Auto", "16", "24", "32", "48"]

CSS: Final[str] = """
html { scrollbar-gutter: stable; }
body { overflow: auto; }
.gradio-container {
    max-width: none;
    width: 100%;
    margin: 0;
    padding: 0.75rem 1rem 1rem;
}
#main-row {
    gap: 1rem;
    align-items: stretch;
}
#controls-panel {
    display: flex;
    flex-direction: column;
    gap: 0.75rem;
}
#preview-panel {
    min-height: 540px;
}
.preview-placeholder {
    position: relative;
    overflow: hidden;
    width: 100%;
    min-height: 540px;
    display: flex;
    align-items: center;
    justify-content: center;
    border-radius: 14px;
    background:
        radial-gradient(circle at top left, rgba(250, 204, 21, 0.22), transparent 32%),
        linear-gradient(135deg, #0f172a 0%, #172554 48%, #1e293b 100%);
    border: 1px solid rgba(148, 163, 184, 0.24);
    color: #f8fafc;
}
.preview-placeholder::before {
    content: "";
    position: absolute;
    inset: 0;
    background: linear-gradient(180deg, rgba(15, 23, 42, 0.18), rgba(15, 23, 42, 0.4));
}
.preview-inner {
    position: relative;
    z-index: 1;
    max-width: 460px;
    padding: 32px;
    text-align: center;
    border-radius: 20px;
    background: rgba(15, 23, 42, 0.44);
    box-shadow: 0 18px 50px rgba(15, 23, 42, 0.24);
    backdrop-filter: blur(10px);
}
.preview-placeholder .preview-title {
    color: #f8fafc !important;
    font-size: 28px;
    font-weight: 600;
    margin-bottom: 8px;
    letter-spacing: 0.01em;
    text-shadow: 0 1px 2px rgba(15, 23, 42, 0.4);
}
.preview-placeholder .preview-desc {
    color: rgba(226, 232, 240, 0.96) !important;
    font-size: 16px;
    line-height: 1.5;
    text-shadow: 0 1px 2px rgba(15, 23, 42, 0.35);
}
#status-text {
    font-size: 13px;
    opacity: 0.92;
}
#video-summary {
    margin-top: -0.15rem;
    padding: 0.8rem 0.9rem;
    border-radius: 12px;
    background: rgba(15, 23, 42, 0.04);
    border: 1px solid rgba(148, 163, 184, 0.18);
}
@media (max-width: 900px) {
    #main-row {
        flex-direction: column;
    }
    #preview-panel,
    .preview-placeholder {
        min-height: 420px;
    }
}
"""


def preview_placeholder_html(title: str, description: str) -> str:
    return f"""
<div class="preview-placeholder">
  <div class="preview-inner">
    <div class="preview-title">{title}</div>
    <div class="preview-desc">{description}</div>
  </div>
</div>
"""


def default_video_summary() -> str:
    return "Upload a video to infer the target keyframe count automatically, or choose a manual max."


def fallback_video_summary() -> str:
    return "Could not inspect video metadata on upload. Using the default target of **24 keyframes**."


def manual_video_summary(target_frames: int, metadata: VideoMetadata | None = None) -> str:
    if metadata is None:
        return f"Using a manual limit of **{target_frames} keyframes**."
    return (
        f"Using a manual limit of **{target_frames} keyframes** for a **{metadata.duration_seconds:.1f}s** clip "
        f"at **{metadata.width}x{metadata.height}** and **{metadata.fps:.1f} fps**."
    )


def video_summary_text(metadata: VideoMetadata, target_frames: int) -> str:
    return (
        f"Auto-selecting **{target_frames} keyframes** from a **{metadata.duration_seconds:.1f}s** clip "
        f"at **{metadata.width}x{metadata.height}** and **{metadata.fps:.1f} fps**."
    )


def start_generation(target_frames: int) -> tuple[object, object, str]:
    return (
        gr.update(interactive=False, value="Converting..."),
        gr.update(interactive=False),
        preview_placeholder_html(
            "Preparing Video for COLMAP",
            f"Normalizing the clip, selecting {target_frames} sharp overlapping keyframes, and running sparse reconstruction.",
        ),
    )


def _status_text(outputs: ConversionOutputs) -> str:
    coverage = 0.0
    if outputs.selected_frames:
        coverage = outputs.registered_frames / outputs.selected_frames

    return (
        f"Prepared **{outputs.scene_name}** from a **{outputs.duration_seconds:.1f}s** clip. "
        f"Selected **{outputs.selected_frames}** keyframes, COLMAP registered **{outputs.registered_frames}**, "
        f"and the reconstruction quality is **{outputs.quality_label}** "
        f"({math.floor(coverage * 100)}% registration)."
    )


def run_conversion(
    video_path: str | None,
    target_frames: int,
    sampling_profile: str,
    max_edge: str,
) -> tuple[object, object, object, str]:
    if not video_path:
        raise gr.Error("Upload a video first.")

    try:
        outputs = convert_video_to_colmap_archive(
            video_path=video_path,
            target_frames=target_frames,
            profile_key=sampling_profile,
            max_image_edge=int(max_edge),
        )
        return (
            gr.update(value=str(outputs.archive_path), visible=True, interactive=True),
            gr.update(value=str(outputs.report_path), visible=True, interactive=True),
            gr.update(value=str(outputs.contact_sheet_path), visible=True),
            _status_text(outputs),
        )
    except gr.Error:
        raise
    except Exception as exc:
        raise gr.Error(f"Conversion failed: {type(exc).__name__}: {exc}") from exc


def clear_all() -> tuple[None, object, object, object, str, str, int, object]:
    return (
        None,
        gr.update(value=None, visible=False),
        gr.update(value=None, visible=False),
        gr.update(value=None, visible=False),
        "",
        default_video_summary(),
        DEFAULT_TARGET_FRAMES,
        gr.update(value="Auto"),
    )


def update_target_settings(video_path: str | None, target_mode: str) -> tuple[object, object, str, int]:
    has_video = bool(video_path)
    generate_update = gr.update(interactive=has_video, value="Build COLMAP Archive")
    clear_update = gr.update(interactive=has_video)

    if not video_path:
        if target_mode != "Auto":
            manual_frames = int(target_mode)
            return (
                generate_update,
                clear_update,
                manual_video_summary(manual_frames),
                manual_frames,
            )
        return (
            generate_update,
            clear_update,
            default_video_summary(),
            DEFAULT_TARGET_FRAMES,
        )

    try:
        metadata = read_video_metadata(Path(video_path))
        if target_mode == "Auto":
            target_frames = infer_target_frames(metadata)
            summary = video_summary_text(metadata, target_frames)
        else:
            target_frames = int(target_mode)
            summary = manual_video_summary(target_frames, metadata)
    except Exception:
        if target_mode == "Auto":
            target_frames = DEFAULT_TARGET_FRAMES
            summary = fallback_video_summary()
        else:
            target_frames = int(target_mode)
            summary = manual_video_summary(target_frames)

    return (
        generate_update,
        clear_update,
        summary,
        target_frames,
    )


def build_demo() -> gr.Blocks:
    with gr.Blocks(
        css=CSS,
        title="Video to COLMAP for tttLRM",
        theme=gr.themes.Origin(),
    ) as demo:
        gr.Markdown("## Video to COLMAP for tttLRM")
        gr.Markdown(
            "Upload a single video. The Space will pick sharp overlapping keyframes, run COLMAP, and export a raw scene archive ready for the `tttLRM` Space."
        )

        with gr.Row(elem_id="main-row", equal_height=True):
            with gr.Column(scale=3, min_width=320, elem_id="controls-panel"):
                video_in = gr.File(
                    label="Input Video",
                    type="filepath",
                    file_types=[".mp4", ".mov", ".webm", ".mkv", ".avi"],
                )
                target_frames_state = gr.State(value=DEFAULT_TARGET_FRAMES)
                target_mode = gr.Dropdown(
                    label="Max Angles / Keyframes",
                    choices=TARGET_FRAME_CHOICES,
                    value="Auto",
                )
                video_summary = gr.Markdown(default_video_summary(), elem_id="video-summary")
                sampling_profile = gr.Dropdown(
                    label="Sampling Profile",
                    choices=["balanced", "dense", "sparse"],
                    value="balanced",
                )
                max_edge = gr.Dropdown(
                    label="Max Frame Edge",
                    choices=["960", "1280", "1600"],
                    value="1280",
                )
                with gr.Row():
                    generate_btn = gr.Button("Build COLMAP Archive", variant="primary", interactive=False)
                    clear_btn = gr.Button("Clear", interactive=False)
                archive_download = gr.File(label="Download Raw COLMAP Archive", visible=False)
                report_download = gr.File(label="Download Reconstruction Report", visible=False)
                status_text = gr.Markdown(elem_id="status-text")

            with gr.Column(scale=7, min_width=520):
                preview_html = gr.HTML(
                    value=preview_placeholder_html(
                        "Keyframe Selection Preview",
                        "After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
                    ),
                    elem_id="preview-panel",
                )
                contact_sheet = gr.Image(label="Selected Keyframes", visible=False, type="filepath")

        video_in.change(
            update_target_settings,
            inputs=[video_in, target_mode],
            outputs=[generate_btn, clear_btn, video_summary, target_frames_state],
            queue=False,
        )
        target_mode.change(
            update_target_settings,
            inputs=[video_in, target_mode],
            outputs=[generate_btn, clear_btn, video_summary, target_frames_state],
            queue=False,
        )
        generate_btn.click(
            start_generation,
            inputs=[target_frames_state],
            outputs=[generate_btn, clear_btn, preview_html],
            queue=False,
        ).then(
            run_conversion,
            inputs=[video_in, target_frames_state, sampling_profile, max_edge],
            outputs=[archive_download, report_download, contact_sheet, status_text],
        ).then(
            lambda: (
                gr.update(interactive=True, value="Build COLMAP Archive"),
                gr.update(interactive=True),
                preview_placeholder_html(
                    "Keyframe Selection Complete",
                    "Review the contact sheet below and download the raw COLMAP archive for the `tttLRM` Space.",
                ),
            ),
            outputs=[generate_btn, clear_btn, preview_html],
            queue=False,
        )
        clear_btn.click(
            clear_all,
            outputs=[video_in, archive_download, report_download, contact_sheet, status_text, video_summary, target_frames_state, target_mode],
            queue=False,
        ).then(
            lambda: (
                gr.update(interactive=False),
                gr.update(interactive=False),
                preview_placeholder_html(
                    "Keyframe Selection Preview",
                    "After conversion, the selected frames contact sheet will appear here so you can check overlap and viewpoint coverage.",
                ),
            ),
            outputs=[generate_btn, clear_btn, preview_html],
            queue=False,
        )

    demo.queue(max_size=4)
    return demo


if __name__ == "__main__":
    build_demo().launch()